In [86]:
# %pip install datasets -q
# %pip install transformers -q
# %pip install torch -q
# %pip install seqeval -q
# %pip install evaluate -q
# %pip install accelerate -q

In [87]:
def decode(words, labels, label_names):
	line1 = ''
	line2 = ''
	for word, label in zip(words, labels):
		full_label = label_names[label]
		max_length = max(len(word), len(full_label))
		line1 += word + ' ' * (max_length - len(word) + 1)
		line2 += full_label + ' ' * (max_length - len(full_label) + 1)

	return line1, line2


def align_labels_with_tokens(labels, word_ids):
	new_labels = []
	current_word = None
	for word_id in word_ids:
		if word_id != current_word:
			# Start of a new word!
			current_word = word_id
			label = -100 if word_id is None else labels[word_id]
			new_labels.append(label)
		elif word_id is None:
			# Special token
			new_labels.append(-100)
		else:
			# Same word as previous token
			label = labels[word_id]
			# If the label is B-XXX we change it to I-XXX
			if label % 2 == 1:
				label += 1
			new_labels.append(label)

	return new_labels


def tokenize_and_align_labels(examples, tokenizer):
	tokenized_inputs = tokenizer(
		examples['tokens'], truncation=True, is_split_into_words=True
	)
	all_labels = examples['ner_tags']
	new_labels = []
	for i, labels in enumerate(all_labels):
		word_ids = tokenized_inputs.word_ids(i)
		new_labels.append(align_labels_with_tokens(labels, word_ids))

	tokenized_inputs['labels'] = new_labels
	return tokenized_inputs

from datasets import ClassLabel, Dataset, Features, Sequence, Value

tag_to_id = {
	'O': 0,
	'B-PER': 1,
	'I-PER': 2,
	'B-ORG': 3,
	'I-ORG': 4,
	'B-LOC': 5,
	'I-LOC': 6,
	'B-MISC': 7,
	'I-MISC': 8,
	'B-POK': 9,
	'I-POK': 10,
}
id_to_tag = {id: tag for tag, id in tag_to_id.items()}


def iob2_to_dataset(fp):
	"""Converts an iob2 file to a huggingface dataset.
	fp: path to the iob2 file."""
	with open(fp, encoding='utf-8') as f:
		raw_data = f.readlines()

	data = {'tokens': [], 'ner_tags': [], 'ner_tags_id': [], 'index': [], 'id': []}
	current = {'tokens': [], 'ner_tags': [], 'ner_tags_id': [], 'index': []}
	sentence_idx = 0
	word_idx = 0
	for line_idx, line in enumerate(raw_data):
		if line.startswith('#'):
			continue
		if line == '\n':  # new sentence
			data['tokens'].append(current['tokens'])
			data['ner_tags'].append(current['ner_tags'])
			data['ner_tags_id'].append(current['ner_tags_id'])
			data['index'].append(current['index'])
			data['id'].append(str(sentence_idx))
			current = {'tokens': [], 'ner_tags': [], 'ner_tags_id': [], 'index': []}
			sentence_idx += 1
			word_idx = 0
			continue

		try:
			word, ner_tag = line.split()
		except ValueError:
			raise ValueError(f'Invalid line: {line} at line {line_idx+1}')
		current['tokens'].append(word)
		current['ner_tags'].append(ner_tag)
		try:
			current['ner_tags_id'].append(tag_to_id[ner_tag])
		except KeyError:
			raise ValueError(
				f'Invalid tag: {ner_tag}. Valid tags are: {list(tag_to_id.keys())}'
			)
		current['index'].append(word_idx)
		word_idx += 1
	# the file does not end in a newline, so we need to append the last sentence
	if word_idx != 0:
		data['tokens'].append(current['tokens'])
		data['ner_tags'].append(current['ner_tags'])
		data['ner_tags_id'].append(current['ner_tags_id'])
		data['index'].append(current['index'])
		data['id'].append(str(sentence_idx))

	features = Features(
		{
			'id': Value('string'),
			'tokens': Sequence(Value('string')),
			'ner_tags': Sequence(ClassLabel(names=list(tag_to_id.keys()))),
			'ner_tags_id': Sequence(Value('int32')),
			'index': Sequence(Value('int32')),
		}
	)
	dataset_raw = Dataset.from_dict(data, features=features)
	return dataset_raw

In [88]:
# from datasetutils import decode
# from iob2converter import iob2_to_dataset
from transformers import AutoModelForTokenClassification

In [89]:
file_path = '../data/TaggedSeparated/german/0.iob2'

de_ds = iob2_to_dataset(file_path)

ner_feature_fr = de_ds.features['ner_tags']
label_names = ner_feature_fr.feature.names
print(label_names)

['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC', 'B-MISC', 'I-MISC', 'B-POK', 'I-POK']


In [90]:
words = de_ds[0]['tokens']
labels = de_ds[0]['ner_tags']
print('\n'.join(decode(words, labels, label_names)))

Ash   Pikachu und der Rest der Gang sehen ihre größte Herausforderung entgegen als zwei hinterlistige Diebinnen den geheimnisvollsten und gefährlichsten aller Kristalle Herztropfen rauben wollen Fällt er in ihre Hände ist die Zerstörung der Wasserstadt Altomare unvermeidbar Es beginnt ein atemberaubendes Rennen gegen die Zeit bei dem die letzte Hoffnung auf Latios und Latias ruht die als Hüter des Kristalls mit magischen Kräften ausgestattet sind 
B-PER B-POK   O   O   O    O   O    O     O    O      O               O        O   O    O             O         O   O                 O   O              O     O         O           O      O      O     O  O  O    O     O   O   O          O   B-LOC       I-LOC    O            O  O       O   O               O      O     O   O    O   O   O   O      O        O   B-POK  O   B-POK  O    O   O   O     O   O         O   O         O       O            O    


In [91]:
import torch
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [92]:
from transformers import AutoTokenizer

model_id = 'google-bert/bert-base-multilingual-cased'
tokenizer = AutoTokenizer.from_pretrained(model_id)



In [93]:
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(
        examples["tokens"], 
        truncation=True, 
        padding="max_length", 
        max_length=128,
        is_split_into_words=True,
        return_tensors="pt"
    ).to(device)

    all_labels = examples["ner_tags"]

    new_labels = []
    for i, labels in enumerate(all_labels):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        new_labels.append(align_labels_with_tokens(labels, word_ids))

    tokenized_inputs["labels"] = new_labels

    return tokenized_inputs

def align_labels_with_tokens(labels, word_ids):
    """
    This function aligns labels with tokens produced by the tokenizer.
    - `-100` is used for special tokens to ignore them during training.
    - If the label is B-XXX, subsequent sub-tokens receive I-XXX.
    """
    new_labels = []
    current_word = None
    for word_id in word_ids:
        if word_id != current_word:
            current_word = word_id
            label = -100 if word_id is None else labels[word_id]
            new_labels.append(label)
        elif word_id is None:
            new_labels.append(-100)
        else:
            label = labels[word_id]
            # Convert B-XXX to I-XXX for sub-tokens
            if label % 2 == 1:
                label += 1
            new_labels.append(label)

    return new_labels

In [94]:
model = AutoModelForTokenClassification.from_pretrained(
    model_id, num_labels=len(label_names)
).to(device)

Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [95]:
tokenized_ds = de_ds.map(tokenize_and_align_labels, batched=True)

print(tokenized_ds)

Map: 100%|██████████| 1/1 [00:00<00:00, 285.17 examples/s]

Dataset({
    features: ['tokens', 'ner_tags', 'ner_tags_id', 'index', 'id', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
    num_rows: 1
})





In [96]:
import evaluate

metric = evaluate.load("seqeval")

In [97]:
# Rob span-f1 

def toSpans(tags):
    spans = set()
    for beg in range(len(tags)):
        if tags[beg][0] == 'B':
            end = beg
            for end in range(beg+1, len(tags)):
                if tags[end][0] != 'I':
                    break
            spans.add(str(beg) + '-' + str(end) + ':' + tags[beg][2:])
    return spans


def getInstanceScores(predSpans, goldSpans):
    tp = 0
    fp = 0
    fn = 0
    overlap = len(goldSpans.intersection(predSpans))
    tp += overlap
    fp += len(predSpans) - overlap
    fn += len(goldSpans) - overlap
        
    prec = 0.0 if tp+fp == 0 else tp/(tp+fp)
    rec = 0.0 if tp+fn == 0 else tp/(tp+fn)
    f1 = 0.0 if prec+rec == 0.0 else 2 * (prec * rec) / (prec + rec)
    return f1


In [98]:
import numpy as np


def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)

    true_labels = [[label_names[l] for l in label if l != -100] for label in labels]
    true_predictions = [
        [label_names[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    all_metrics = metric.compute(predictions=true_predictions, references=true_labels)

    pred_spans, true_spans = toSpans(true_predictions[0]), toSpans(true_labels[0])
    score = getInstanceScores(pred_spans, true_spans)

    return {
        "precision": all_metrics["overall_precision"],
        "recall": all_metrics["overall_recall"],
        "f1": all_metrics["overall_f1"],
        "accuracy": all_metrics["overall_accuracy"],
        "span_f1": score
    }

In [99]:
from transformers import AutoModelForTokenClassification
model = AutoModelForTokenClassification.from_pretrained (
    model_id,
    num_labels=len(label_names),
    id2label={id: label for id, label in enumerate(label_names)},
    label2id={label: id for id, label in enumerate(label_names)},
).to(device)
model.config.num_labels

Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


11

In [100]:
from transformers import TrainingArguments
from transformers import Trainer


args = TrainingArguments(
    "mbert-finetuned-ner",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    num_train_epochs=3,
    weight_decay=0.01,
    # remove_unused_columns=False
)

In [101]:
# trainer = Trainer(
#     model=model,
#     args=args,
#     train_dataset=tokenized_ds["train"],
#     eval_dataset=tokenized_ds["validation"],
#     compute_metrics=compute_metrics,
# )

# trainer.train()


In [102]:
from datasets import ClassLabel, Dataset, Features, Sequence, Value

tag_to_id = {
	'O': 0,
	'B-PER': 1,
	'I-PER': 2,
	'B-ORG': 3,
	'I-ORG': 4,
	'B-LOC': 5,
	'I-LOC': 6,
	'B-MISC': 7,
	'I-MISC': 8,
	'B-POK': 9,
	'I-POK': 10,
}
id_to_tag = {id: tag for tag, id in tag_to_id.items()}


def iob2_to_dataset(fp):
	"""Converts an iob2 file to a huggingface dataset.
	fp: path to the iob2 file."""
	with open(fp, encoding='utf-8') as f:
		raw_data = f.readlines()

	data = {'tokens': [], 'ner_tags': [], 'ner_tags_id': [], 'index': [], 'id': []}
	current = {'tokens': [], 'ner_tags': [], 'ner_tags_id': [], 'index': []}
	sentence_idx = 0
	word_idx = 0
	for line_idx, line in enumerate(raw_data):
		if line.startswith('#'):
			continue
		if line == '\n':  # new sentence
			data['tokens'].append(current['tokens'])
			data['ner_tags'].append(current['ner_tags'])
			data['ner_tags_id'].append(current['ner_tags_id'])
			data['index'].append(current['index'])
			data['id'].append(str(sentence_idx))
			current = {'tokens': [], 'ner_tags': [], 'ner_tags_id': [], 'index': []}
			sentence_idx += 1
			word_idx = 0
			continue

		try:
			word, ner_tag = line.split()
		except ValueError:
			raise ValueError(f'Invalid line: {line} at line {line_idx+1}')
		current['tokens'].append(word)
		current['ner_tags'].append(ner_tag)
		try:
			current['ner_tags_id'].append(tag_to_id[ner_tag])
		except KeyError:
			raise ValueError(
				f'Invalid tag: {ner_tag}. Valid tags are: {list(tag_to_id.keys())}'
			)
		current['index'].append(word_idx)
		word_idx += 1
	# the file does not end in a newline, so we need to append the last sentence
	if word_idx != 0:
		data['tokens'].append(current['tokens'])
		data['ner_tags'].append(current['ner_tags'])
		data['ner_tags_id'].append(current['ner_tags_id'])
		data['index'].append(current['index'])
		data['id'].append(str(sentence_idx))

	features = Features(
		{
			'id': Value('string'),
			'tokens': Sequence(Value('string')),
			'ner_tags': Sequence(ClassLabel(names=list(tag_to_id.keys()))),
			'ner_tags_id': Sequence(Value('int32')),
			'index': Sequence(Value('int32')),
		}
	)
	dataset_raw = Dataset.from_dict(data, features=features)
	return dataset_raw

In [103]:
from datasets import Dataset, DatasetDict, Features, Sequence, ClassLabel, Value

tag_to_id = {
	'O': 0,
	'B-PER': 1,
	'I-PER': 2,
	'B-ORG': 3,
	'I-ORG': 4,
	'B-LOC': 5,
	'I-LOC': 6,
	'B-MISC': 7,
	'I-MISC': 8,
	'B-POK': 9,
	'I-POK': 10,
}
id_to_tag = {id: tag for tag, id in tag_to_id.items()}

def iob2s_to_datasets(file_paths, reference_path):
    """
    Converts an IOB2 file into a DatasetDict with train and validation splits.
    Assumes the input file uses whitespace to separate tokens and tags, and that each sentence is separated by a blank line.
    """
    tokens, ner_tags = [], []
    sentences, sentence_tags = [], []

    label_set = set()
    for file_path in file_paths:
        with open(file_path, 'r', encoding='utf-8') as f:
            for i, line in enumerate(f):
                line = line.strip()
                if not line:
                    if tokens and ner_tags:
                        sentences.append(tokens)
                        sentence_tags.append(ner_tags)
                    tokens, ner_tags = [], []
                else:
                    try:
                        word, tag = line.split()
                    except:
                        raise ValueError(f"Each line must have two columns: ({i}) {line}")
                    tokens.append(word)
                    ner_tags.append(tag)
                    label_set.add(tag)

            if tokens and ner_tags:
                sentences.append(tokens)
                sentence_tags.append(ner_tags)

    label_list = list(tag_to_id.keys())
    label_mapping = {label: i for i, label in enumerate(label_list)}

    indexed_tags = [[label_mapping[tag] for tag in tags] for tags in sentence_tags]
    dataset = Dataset.from_dict({"tokens": sentences, "ner_tags": indexed_tags})
    reference_german = iob2_to_dataset(reference_path[0]).remove_columns(["ner_tags_id", "index", "id"])
    reference_french = iob2_to_dataset(reference_path[1]).remove_columns(["ner_tags_id", "index", "id"])
    reference_english = iob2_to_dataset(reference_path[2]).remove_columns(["ner_tags_id", "index", "id"])
    

    features = Features({
        "tokens": Sequence(Value("string")),
        "ner_tags": Sequence(ClassLabel(names=label_list))
    })

    datasets = DatasetDict({
        "train": dataset.cast(features),
        "val_de": reference_german.cast(features),
        "val_fr": reference_french.cast(features),
        "val_en": reference_english.cast(features),
    })

    return datasets

In [104]:
import os

def baseline_res(referenceFiles):
    files = os.listdir('../data/TaggedSeparated/' + "english")

    train_files = np.random.choice(files, 1, replace=False)
    train_files_paths = ['../data/TaggedSeparated/' + "english" + '/' + f for f in train_files]
    datasets = iob2s_to_datasets(train_files_paths, list(referenceFiles.values()))
    tokenized_ds = datasets.map(tokenize_and_align_labels, batched=True)
    
    model = AutoModelForTokenClassification.from_pretrained(
        model_id, num_labels=len(label_names)
    ).to(device)
    
    
    train_dataset = tokenized_ds["train"]
    eval_dataset = tokenized_ds["val_" + "fr"]

    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
    )

    trainer.train()

    res_de = trainer.predict(tokenized_ds["val_de"]).metrics
    res_fr = trainer.predict(tokenized_ds["val_fr"]).metrics
    res_en = trainer.predict(tokenized_ds["val_en"]).metrics
    return res_de, res_fr, res_en

In [105]:
import os
import pandas as pd
import numpy as np

llang = {
    "fr": "french",
    "en": "english",
    "de": "german"
}

# - Load the iob2 files
# - For each language:
#   - 5 times:
#     - For i = 1..len(files):
#       - Choose i random files and concat them.
#       - Train the model on the concatenated files.
#       - Evaluate the model on reference texts.
#     - Average the results and store them in a dataframe.
# - Save the dataframe as a tsv file.

ITERATIONS = 5

referenceFiles = {
    "fr": '../ReferenceText/ReferenceTextFrench.iob2',
    "en": '../ReferenceText/ReferenceTextEnglish.iob2',
    "de": '../ReferenceText/ReferenceTextGerman.iob2'
}

main_df = pd.DataFrame(columns=["train_lang", "num_train_files", "test_lang", "precision", "recall", "f1", "accuracy", "span_f1"])

de, fr, en = baseline_res(referenceFiles)
main_df.loc[-1] = ["baseline", "0", "de", de["test_precision"], de["test_recall"], de["test_f1"], de["test_accuracy"], de["test_span_f1"]]
main_df.index = main_df.index + 1
main_df = main_df.sort_index()
main_df.loc[-1] = ["baseline", "0", "fr", fr["test_precision"], fr["test_recall"], fr["test_f1"], fr["test_accuracy"], fr["test_span_f1"]]
main_df.index = main_df.index + 1
main_df = main_df.sort_index()
main_df.loc[-1] = ["baseline", "0", "en", en["test_precision"], en["test_recall"], en["test_f1"], en["test_accuracy"], en["test_span_f1"]]
main_df.index = main_df.index + 1
main_df = main_df.sort_index()

for lang, language in llang.items():
    files = os.listdir('../data/TaggedSeparated/' + language)
    for i in range(len(files)):
        df = pd.DataFrame(columns=["test_lang", "precision", "recall", "f1", "accuracy", "span_f1"])

        for it in range(ITERATIONS):
            train_files = np.random.choice(files, i+1, replace=False)
            train_files_paths = ['../data/TaggedSeparated/' + language + '/' + f for f in train_files]
            datasets = iob2s_to_datasets(train_files_paths, list(referenceFiles.values()))
            tokenized_ds = datasets.map(tokenize_and_align_labels, batched=True)
            
            model = AutoModelForTokenClassification.from_pretrained(
                model_id, num_labels=len(label_names)
            ).to(device)
            
            
            train_dataset = tokenized_ds["train"]
            eval_dataset = tokenized_ds["val_" + lang]

            trainer = Trainer(
                model=model,
                args=args,
                train_dataset=train_dataset,
                eval_dataset=eval_dataset,
                compute_metrics=compute_metrics,
            )

            trainer.train()

            res_de = trainer.predict(tokenized_ds["val_de"]).metrics
            res_fr = trainer.predict(tokenized_ds["val_fr"]).metrics
            res_en = trainer.predict(tokenized_ds["val_en"]).metrics
            df.loc[-1] = ["de", res_de["test_precision"], res_de["test_recall"], res_de["test_f1"], res_de["test_accuracy"], res_de["test_span_f1"]]
            df.index = df.index + 1
            df = df.sort_index()
            df.loc[-1] = ["fr", res_fr["test_precision"], res_fr["test_recall"], res_fr["test_f1"], res_fr["test_accuracy"], res_fr["test_span_f1"]]
            df.index = df.index + 1
            df = df.sort_index()
            df.loc[-1] = ["en", res_en["test_precision"], res_en["test_recall"], res_en["test_f1"], res_en["test_accuracy"], res_en["test_span_f1"]]
            df.index = df.index + 1
            df = df.sort_index()
        
        # group by lang and average
        df = df.groupby("test_lang").mean()
        # add to parent dataframe
        main_df.loc[-1] = [language, i+1, "de", df.loc["de"]["precision"], df.loc["de"]["recall"], df.loc["de"]["f1"], df.loc["de"]["accuracy"], df.loc["de"]["span_f1"]]
        main_df.index = main_df.index + 1
        main_df = main_df.sort_index()
        main_df.loc[-1] = [language, i+1, "fr", df.loc["fr"]["precision"], df.loc["fr"]["recall"], df.loc["fr"]["f1"], df.loc["fr"]["accuracy"], df.loc["fr"]["span_f1"]]
        main_df.index = main_df.index + 1
        main_df = main_df.sort_index()
        main_df.loc[-1] = [language, i+1, "en", df.loc["en"]["precision"], df.loc["en"]["recall"], df.loc["en"]["f1"], df.loc["en"]["accuracy"], df.loc["en"]["span_f1"]]
        main_df.index = main_df.index + 1
        main_df = main_df.sort_index()

main_df.to_csv("results.tsv", sep="\t", index=False)

Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 397.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 224.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  _warn_prf(average, modifier, msg_start, len(result))
                                             
 33%|███▎      | 1/3 [00:02<00:04,  2.00s/it]

{'eval_loss': 1.7161171436309814, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8571428571428571, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.957, 'eval_steps_per_second': 11.957, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
                                             
 67%|██████▋   | 2/3 [00:06<00:03,  3.28s/it]

{'eval_loss': 1.315163254737854, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0907, 'eval_samples_per_second': 11.031, 'eval_steps_per_second': 11.031, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
                                             
100%|██████████| 3/3 [00:10<00:00,  3.49s/it]

{'eval_loss': 1.1350722312927246, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0926, 'eval_samples_per_second': 10.794, 'eval_steps_per_second': 10.794, 'epoch': 3.0}


100%|██████████| 3/3 [00:11<00:00,  3.73s/it]


{'train_runtime': 11.1833, 'train_samples_per_second': 0.268, 'train_steps_per_second': 0.268, 'train_loss': 1.677202542622884, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 83.17it/s]
100%|██████████| 1/1 [00:00<00:00, 153.56it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 76.78it/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.26 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized

{'eval_loss': 2.2774741649627686, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.18253968253968253, 'eval_span_f1': 0.0, 'eval_runtime': 0.0962, 'eval_samples_per_second': 10.398, 'eval_steps_per_second': 10.398, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.65s/it]

{'eval_loss': 1.9968950748443604, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.47619047619047616, 'eval_span_f1': 0.0, 'eval_runtime': 0.1297, 'eval_samples_per_second': 7.713, 'eval_steps_per_second': 7.713, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  1.90s/it]

{'eval_loss': 1.8676574230194092, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6904761904761905, 'eval_span_f1': 0.0, 'eval_runtime': 0.1112, 'eval_samples_per_second': 8.996, 'eval_steps_per_second': 8.996, 'epoch': 3.0}


100%|██████████| 3/3 [00:06<00:00,  2.26s/it]


{'train_runtime': 6.7685, 'train_samples_per_second': 0.443, 'train_steps_per_second': 0.443, 'train_loss': 1.8140039443969727, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 79.92it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.68it/s]
100%|██████████| 1/1 [00:00<00:00, 86.90it/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should 

{'eval_loss': 2.2774741649627686, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.18253968253968253, 'eval_span_f1': 0.0, 'eval_runtime': 0.0891, 'eval_samples_per_second': 11.217, 'eval_steps_per_second': 11.217, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.65s/it]

{'eval_loss': 1.9968950748443604, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.47619047619047616, 'eval_span_f1': 0.0, 'eval_runtime': 0.0912, 'eval_samples_per_second': 10.971, 'eval_steps_per_second': 10.971, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  1.92s/it]

{'eval_loss': 1.8676574230194092, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6904761904761905, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.411, 'eval_steps_per_second': 11.411, 'epoch': 3.0}


100%|██████████| 3/3 [00:06<00:00,  2.24s/it]


{'train_runtime': 6.7208, 'train_samples_per_second': 0.446, 'train_steps_per_second': 0.446, 'train_loss': 1.8140039443969727, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.51it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
100%|██████████| 1/1 [00:00<00:00, 153.51it/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.86 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should 

{'eval_loss': 2.2774741649627686, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.18253968253968253, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.679, 'eval_steps_per_second': 11.679, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:02<00:01,  1.54s/it]

{'eval_loss': 1.9968950748443604, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.47619047619047616, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.483, 'eval_steps_per_second': 11.483, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  1.81s/it]

{'eval_loss': 1.8676574230194092, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6904761904761905, 'eval_span_f1': 0.0, 'eval_runtime': 0.1116, 'eval_samples_per_second': 8.958, 'eval_steps_per_second': 8.958, 'epoch': 3.0}


100%|██████████| 3/3 [00:06<00:00,  2.18s/it]


{'train_runtime': 6.5254, 'train_samples_per_second': 0.46, 'train_steps_per_second': 0.46, 'train_loss': 1.8140039443969727, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.65it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.67it/s]
100%|██████████| 1/1 [00:00<00:00, 153.74it/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 181.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably 

{'eval_loss': 2.2774741649627686, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.18253968253968253, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.609, 'eval_steps_per_second': 11.609, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:02<00:01,  1.54s/it]

{'eval_loss': 1.9968950748443604, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.47619047619047616, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.543, 'eval_steps_per_second': 11.543, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:04<00:00,  1.77s/it]

{'eval_loss': 1.8676574230194092, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6904761904761905, 'eval_span_f1': 0.0, 'eval_runtime': 0.0966, 'eval_samples_per_second': 10.348, 'eval_steps_per_second': 10.348, 'epoch': 3.0}


100%|██████████| 3/3 [00:06<00:00,  2.09s/it]


{'train_runtime': 6.2653, 'train_samples_per_second': 0.479, 'train_steps_per_second': 0.479, 'train_loss': 1.8140039443969727, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.77it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.64it/s]
100%|██████████| 1/1 [00:00<00:00, 133.12it/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 399.42 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.72 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.2774741649627686, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.18253968253968253, 'eval_span_f1': 0.0, 'eval_runtime': 0.1007, 'eval_samples_per_second': 9.935, 'eval_steps_per_second': 9.935, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.63s/it]

{'eval_loss': 1.9968950748443604, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.47619047619047616, 'eval_span_f1': 0.0, 'eval_runtime': 0.0852, 'eval_samples_per_second': 11.744, 'eval_steps_per_second': 11.744, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  1.85s/it]

{'eval_loss': 1.8676574230194092, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6904761904761905, 'eval_span_f1': 0.0, 'eval_runtime': 0.0886, 'eval_samples_per_second': 11.281, 'eval_steps_per_second': 11.281, 'epoch': 3.0}


100%|██████████| 3/3 [00:06<00:00,  2.21s/it]


{'train_runtime': 6.63, 'train_samples_per_second': 0.452, 'train_steps_per_second': 0.452, 'train_loss': 1.8140039443969727, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.65it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.30it/s]
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
Casting the dataset: 100%|██████████| 2/2 [00:00<00:00, 1329.63 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Map: 100%|██████████| 2/2 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.074902296066284, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.47619047619047616, 'eval_span_f1': 0.0, 'eval_runtime': 0.0916, 'eval_samples_per_second': 10.913, 'eval_steps_per_second': 10.913, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.75s/it]

{'eval_loss': 1.8148616552352905, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7301587301587301, 'eval_span_f1': 0.0, 'eval_runtime': 0.0931, 'eval_samples_per_second': 10.737, 'eval_steps_per_second': 10.737, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  1.98s/it]

{'eval_loss': 1.6551257371902466, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8174603174603174, 'eval_span_f1': 0.0, 'eval_runtime': 0.0932, 'eval_samples_per_second': 10.734, 'eval_steps_per_second': 10.734, 'epoch': 3.0}


100%|██████████| 3/3 [00:06<00:00,  2.32s/it]


{'train_runtime': 6.9633, 'train_samples_per_second': 0.862, 'train_steps_per_second': 0.431, 'train_loss': 2.045081297556559, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.17it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.64it/s]
100%|██████████| 1/1 [00:00<00:00, 76.82it/s]
Casting the dataset: 100%|██████████| 2/2 [00:00<00:00, 996.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.85 examples/s]
Map: 100%|██████████| 2/2 [00:00<00:00, 400.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.72 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized

{'eval_loss': 2.074902296066284, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.47619047619047616, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.251, 'eval_steps_per_second': 12.251, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.79s/it]

{'eval_loss': 1.8148616552352905, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7301587301587301, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.479, 'eval_steps_per_second': 11.479, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  2.07s/it]

{'eval_loss': 1.6551257371902466, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8174603174603174, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.745, 'eval_steps_per_second': 11.745, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.42s/it]


{'train_runtime': 7.2724, 'train_samples_per_second': 0.825, 'train_steps_per_second': 0.413, 'train_loss': 2.045081297556559, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.66it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.78it/s]
100%|██████████| 1/1 [00:00<00:00, 153.41it/s]
Casting the dataset: 100%|██████████| 2/2 [00:00<00:00, 796.49 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 497.54 examples/s]
Map: 100%|██████████| 2/2 [00:00<00:00, 499.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initializ

{'eval_loss': 2.074902296066284, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.47619047619047616, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.102, 'eval_steps_per_second': 12.102, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.83s/it]

{'eval_loss': 1.8148616552352905, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7301587301587301, 'eval_span_f1': 0.0, 'eval_runtime': 0.0867, 'eval_samples_per_second': 11.54, 'eval_steps_per_second': 11.54, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  2.10s/it]

{'eval_loss': 1.6551257371902466, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8174603174603174, 'eval_span_f1': 0.0, 'eval_runtime': 0.0936, 'eval_samples_per_second': 10.679, 'eval_steps_per_second': 10.679, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.45s/it]


{'train_runtime': 7.3414, 'train_samples_per_second': 0.817, 'train_steps_per_second': 0.409, 'train_loss': 2.045081297556559, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.71it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.54it/s]
100%|██████████| 1/1 [00:00<00:00, 166.34it/s]
Casting the dataset: 100%|██████████| 2/2 [00:00<00:00, 1990.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 398.66 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.74 examples/s]
Map: 100%|██████████| 2/2 [00:00<00:00, 499.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 199.92 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 199.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initializ

{'eval_loss': 2.074902296066284, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.47619047619047616, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.681, 'eval_steps_per_second': 11.681, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.78s/it]

{'eval_loss': 1.8148616552352905, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7301587301587301, 'eval_span_f1': 0.0, 'eval_runtime': 0.0937, 'eval_samples_per_second': 10.675, 'eval_steps_per_second': 10.675, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  2.03s/it]

{'eval_loss': 1.6551257371902466, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8174603174603174, 'eval_span_f1': 0.0, 'eval_runtime': 0.0852, 'eval_samples_per_second': 11.744, 'eval_steps_per_second': 11.744, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.35s/it]


{'train_runtime': 7.0612, 'train_samples_per_second': 0.85, 'train_steps_per_second': 0.425, 'train_loss': 2.045081297556559, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.64it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.08it/s]
100%|██████████| 1/1 [00:00<00:00, 153.78it/s]
Casting the dataset: 100%|██████████| 2/2 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 396.14 examples/s]
Map: 100%|██████████| 2/2 [00:00<00:00, 499.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 199.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['cla

{'eval_loss': 2.074902296066284, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.47619047619047616, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.477, 'eval_steps_per_second': 11.477, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.82s/it]

{'eval_loss': 1.8148616552352905, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7301587301587301, 'eval_span_f1': 0.0, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.707, 'eval_steps_per_second': 11.707, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  2.10s/it]

{'eval_loss': 1.6551257371902466, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8174603174603174, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.542, 'eval_steps_per_second': 11.542, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.44s/it]


{'train_runtime': 7.3092, 'train_samples_per_second': 0.821, 'train_steps_per_second': 0.41, 'train_loss': 2.045081297556559, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.63it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.62it/s]
100%|██████████| 1/1 [00:00<00:00, 153.63it/s]
Casting the dataset: 100%|██████████| 3/3 [00:00<00:00, 3000.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 3/3 [00:00<00:00, 545.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 199.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initializ

{'eval_loss': 2.102031946182251, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.36507936507936506, 'eval_span_f1': 0.0, 'eval_runtime': 0.0891, 'eval_samples_per_second': 11.219, 'eval_steps_per_second': 11.219, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:02,  2.03s/it]

{'eval_loss': 1.6958622932434082, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7936507936507936, 'eval_span_f1': 0.0, 'eval_runtime': 0.0967, 'eval_samples_per_second': 10.345, 'eval_steps_per_second': 10.345, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.28s/it]

{'eval_loss': 1.505822777748108, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.1002, 'eval_samples_per_second': 9.985, 'eval_steps_per_second': 9.985, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.65s/it]


{'train_runtime': 7.9632, 'train_samples_per_second': 1.13, 'train_steps_per_second': 0.377, 'train_loss': 1.9825550715128581, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.50it/s]
100%|██████████| 1/1 [00:00<00:00, 166.33it/s]
100%|██████████| 1/1 [00:00<00:00, 60.56it/s]
Casting the dataset: 100%|██████████| 3/3 [00:00<00:00, 2998.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.67 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 992.97 examples/s]
Map: 100%|██████████| 3/3 [00:00<00:00, 599.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 199.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.102031946182251, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.36507936507936506, 'eval_span_f1': 0.0, 'eval_runtime': 0.0882, 'eval_samples_per_second': 11.343, 'eval_steps_per_second': 11.343, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:02,  2.04s/it]

{'eval_loss': 1.6958622932434082, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7936507936507936, 'eval_span_f1': 0.0, 'eval_runtime': 0.0786, 'eval_samples_per_second': 12.718, 'eval_steps_per_second': 12.718, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.31s/it]

{'eval_loss': 1.505822777748108, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0811, 'eval_samples_per_second': 12.325, 'eval_steps_per_second': 12.325, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.63s/it]


{'train_runtime': 7.8853, 'train_samples_per_second': 1.141, 'train_steps_per_second': 0.38, 'train_loss': 1.9825550715128581, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 166.49it/s]
100%|██████████| 1/1 [00:00<00:00, 153.63it/s]
Casting the dataset: 100%|██████████| 3/3 [00:00<00:00, 1500.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Map: 100%|██████████| 3/3 [00:00<00:00, 544.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 222.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.102031946182251, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.36507936507936506, 'eval_span_f1': 0.0, 'eval_runtime': 0.0776, 'eval_samples_per_second': 12.888, 'eval_steps_per_second': 12.888, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.95s/it]

{'eval_loss': 1.6958622932434082, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7936507936507936, 'eval_span_f1': 0.0, 'eval_runtime': 0.0801, 'eval_samples_per_second': 12.483, 'eval_steps_per_second': 12.483, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.15s/it]

{'eval_loss': 1.505822777748108, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.749, 'eval_steps_per_second': 11.749, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.46s/it]


{'train_runtime': 7.3917, 'train_samples_per_second': 1.218, 'train_steps_per_second': 0.406, 'train_loss': 1.9825550715128581, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 181.52it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 153.64it/s]
Casting the dataset: 100%|██████████| 3/3 [00:00<00:00, 3005.23 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.92 examples/s]
Map: 100%|██████████| 3/3 [00:00<00:00, 544.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.93 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably

{'eval_loss': 2.102031946182251, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.36507936507936506, 'eval_span_f1': 0.0, 'eval_runtime': 0.0811, 'eval_samples_per_second': 12.331, 'eval_steps_per_second': 12.331, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  2.00s/it]

{'eval_loss': 1.6958622932434082, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7936507936507936, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.958, 'eval_steps_per_second': 11.958, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.28s/it]

{'eval_loss': 1.505822777748108, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.527, 'eval_steps_per_second': 11.527, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.63s/it]


{'train_runtime': 7.8865, 'train_samples_per_second': 1.141, 'train_steps_per_second': 0.38, 'train_loss': 1.9825550715128581, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.09it/s]
100%|██████████| 1/1 [00:00<00:00, 153.47it/s]
100%|██████████| 1/1 [00:00<00:00, 153.82it/s]
Casting the dataset: 100%|██████████| 3/3 [00:00<00:00, 1501.00 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Map: 100%|██████████| 3/3 [00:00<00:00, 600.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 222.13 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shou

{'eval_loss': 2.102031946182251, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.36507936507936506, 'eval_span_f1': 0.0, 'eval_runtime': 0.0886, 'eval_samples_per_second': 11.288, 'eval_steps_per_second': 11.288, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.09s/it]

{'eval_loss': 1.6958622932434082, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7936507936507936, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.613, 'eval_steps_per_second': 11.613, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.26s/it]

{'eval_loss': 1.505822777748108, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.614, 'eval_steps_per_second': 11.614, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.61s/it]


{'train_runtime': 7.8278, 'train_samples_per_second': 1.15, 'train_steps_per_second': 0.383, 'train_loss': 1.9825550715128581, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.54it/s]
100%|██████████| 1/1 [00:00<00:00, 142.78it/s]
100%|██████████| 1/1 [00:00<00:00, 153.68it/s]
Casting the dataset: 100%|██████████| 4/4 [00:00<00:00, 1999.67 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.45 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 571.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 242.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 222.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.1416709423065186, 'eval_precision': 0.016129032258064516, 'eval_recall': 0.1111111111111111, 'eval_f1': 0.028169014084507043, 'eval_accuracy': 0.3888888888888889, 'eval_span_f1': 0.05714285714285715, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.682, 'eval_steps_per_second': 11.682, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.18s/it]

{'eval_loss': 1.8228745460510254, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.1102, 'eval_samples_per_second': 9.077, 'eval_steps_per_second': 9.077, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.37s/it]

{'eval_loss': 1.56313955783844, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0867, 'eval_samples_per_second': 11.533, 'eval_steps_per_second': 11.533, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.70s/it]


{'train_runtime': 8.0873, 'train_samples_per_second': 1.484, 'train_steps_per_second': 0.371, 'train_loss': 2.1291834513346353, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.64it/s]
100%|██████████| 1/1 [00:00<00:00, 166.60it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 4/4 [00:00<00:00, 4000.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 532.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 199.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 199.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shou

{'eval_loss': 2.1416709423065186, 'eval_precision': 0.016129032258064516, 'eval_recall': 0.1111111111111111, 'eval_f1': 0.028169014084507043, 'eval_accuracy': 0.3888888888888889, 'eval_span_f1': 0.05714285714285715, 'eval_runtime': 0.0796, 'eval_samples_per_second': 12.562, 'eval_steps_per_second': 12.562, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.15s/it]

{'eval_loss': 1.8228745460510254, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.476, 'eval_steps_per_second': 11.476, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.38s/it]

{'eval_loss': 1.56313955783844, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0896, 'eval_samples_per_second': 11.16, 'eval_steps_per_second': 11.16, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.71s/it]


{'train_runtime': 8.1248, 'train_samples_per_second': 1.477, 'train_steps_per_second': 0.369, 'train_loss': 2.1291834513346353, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.68it/s]
100%|██████████| 1/1 [00:00<00:00, 153.69it/s]
100%|██████████| 1/1 [00:00<00:00, 166.61it/s]
Casting the dataset: 100%|██████████| 4/4 [00:00<00:00, 1999.91 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 497.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 569.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 200.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 166.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 313.10 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.1416709423065186, 'eval_precision': 0.016129032258064516, 'eval_recall': 0.1111111111111111, 'eval_f1': 0.028169014084507043, 'eval_accuracy': 0.3888888888888889, 'eval_span_f1': 0.05714285714285715, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.68, 'eval_steps_per_second': 11.68, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.22s/it]

{'eval_loss': 1.8228745460510254, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0956, 'eval_samples_per_second': 10.456, 'eval_steps_per_second': 10.456, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.54s/it]

{'eval_loss': 1.56313955783844, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.1037, 'eval_samples_per_second': 9.644, 'eval_steps_per_second': 9.644, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.87s/it]


{'train_runtime': 8.6116, 'train_samples_per_second': 1.393, 'train_steps_per_second': 0.348, 'train_loss': 2.1291834513346353, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.78it/s]
100%|██████████| 1/1 [00:00<00:00, 166.38it/s]
100%|██████████| 1/1 [00:00<00:00, 153.75it/s]
Casting the dataset: 100%|██████████| 4/4 [00:00<00:00, 4001.24 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.20 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 398.47 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 614.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.1416709423065186, 'eval_precision': 0.016129032258064516, 'eval_recall': 0.1111111111111111, 'eval_f1': 0.028169014084507043, 'eval_accuracy': 0.3888888888888889, 'eval_span_f1': 0.05714285714285715, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.1, 'eval_steps_per_second': 12.1, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.22s/it]

{'eval_loss': 1.8228745460510254, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0917, 'eval_samples_per_second': 10.91, 'eval_steps_per_second': 10.91, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.43s/it]

{'eval_loss': 1.56313955783844, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0962, 'eval_samples_per_second': 10.398, 'eval_steps_per_second': 10.398, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.80s/it]


{'train_runtime': 8.4099, 'train_samples_per_second': 1.427, 'train_steps_per_second': 0.357, 'train_loss': 2.1291834513346353, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.79it/s]
100%|██████████| 1/1 [00:00<00:00, 153.72it/s]
100%|██████████| 1/1 [00:00<00:00, 153.50it/s]
Casting the dataset: 100%|██████████| 4/4 [00:00<00:00, 1990.89 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 497.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 615.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 199.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 199.93 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.1416709423065186, 'eval_precision': 0.016129032258064516, 'eval_recall': 0.1111111111111111, 'eval_f1': 0.028169014084507043, 'eval_accuracy': 0.3888888888888889, 'eval_span_f1': 0.05714285714285715, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.481, 'eval_steps_per_second': 11.481, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.17s/it]

{'eval_loss': 1.8228745460510254, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0886, 'eval_samples_per_second': 11.281, 'eval_steps_per_second': 11.281, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.45s/it]

{'eval_loss': 1.56313955783844, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0902, 'eval_samples_per_second': 11.091, 'eval_steps_per_second': 11.091, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.77s/it]


{'train_runtime': 8.3153, 'train_samples_per_second': 1.443, 'train_steps_per_second': 0.361, 'train_loss': 2.1291834513346353, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.76it/s]
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
100%|██████████| 1/1 [00:00<00:00, 153.53it/s]
Casting the dataset: 100%|██████████| 5/5 [00:00<00:00, 5000.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 993.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 5/5 [00:00<00:00, 587.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.1164660453796387, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3412698412698413, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.548, 'eval_steps_per_second': 11.548, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.36s/it]

{'eval_loss': 1.7120262384414673, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.1072, 'eval_samples_per_second': 9.328, 'eval_steps_per_second': 9.328, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.61s/it]

{'eval_loss': 1.5225865840911865, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0901, 'eval_samples_per_second': 11.095, 'eval_steps_per_second': 11.095, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.94s/it]


{'train_runtime': 8.8062, 'train_samples_per_second': 1.703, 'train_steps_per_second': 0.341, 'train_loss': 2.000351587931315, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.59it/s]
100%|██████████| 1/1 [00:00<00:00, 133.26it/s]
100%|██████████| 1/1 [00:00<00:00, 153.60it/s]
Casting the dataset: 100%|██████████| 5/5 [00:00<00:00, 3321.43 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.55 examples/s]
Map: 100%|██████████| 5/5 [00:00<00:00, 587.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.1164660453796387, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3412698412698413, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.477, 'eval_steps_per_second': 11.477, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.23s/it]

{'eval_loss': 1.7120262384414673, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.744, 'eval_steps_per_second': 11.744, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.52s/it]

{'eval_loss': 1.5225865840911865, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.103, 'eval_steps_per_second': 12.103, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.84s/it]


{'train_runtime': 8.5302, 'train_samples_per_second': 1.758, 'train_steps_per_second': 0.352, 'train_loss': 2.000351587931315, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.72it/s]
100%|██████████| 1/1 [00:00<00:00, 133.13it/s]
100%|██████████| 1/1 [00:00<00:00, 153.62it/s]
Casting the dataset: 100%|██████████| 5/5 [00:00<00:00, 9934.40 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 5/5 [00:00<00:00, 525.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.1164660453796387, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3412698412698413, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.251, 'eval_steps_per_second': 12.251, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.29s/it]

{'eval_loss': 1.7120262384414673, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0941, 'eval_samples_per_second': 10.626, 'eval_steps_per_second': 10.626, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.52s/it]

{'eval_loss': 1.5225865840911865, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.41, 'eval_steps_per_second': 11.41, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.86s/it]


{'train_runtime': 8.5643, 'train_samples_per_second': 1.751, 'train_steps_per_second': 0.35, 'train_loss': 2.000351587931315, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.76it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 153.69it/s]
Casting the dataset: 100%|██████████| 5/5 [00:00<00:00, 4987.28 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.14 examples/s]
Map: 100%|██████████| 5/5 [00:00<00:00, 623.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.1164660453796387, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3412698412698413, 'eval_span_f1': 0.0, 'eval_runtime': 0.1007, 'eval_samples_per_second': 9.934, 'eval_steps_per_second': 9.934, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.43s/it]

{'eval_loss': 1.7120262384414673, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.546, 'eval_steps_per_second': 11.546, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.62s/it]

{'eval_loss': 1.5225865840911865, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0896, 'eval_samples_per_second': 11.155, 'eval_steps_per_second': 11.155, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.96s/it]


{'train_runtime': 8.8909, 'train_samples_per_second': 1.687, 'train_steps_per_second': 0.337, 'train_loss': 2.000351587931315, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.50it/s]
100%|██████████| 1/1 [00:00<00:00, 133.02it/s]
100%|██████████| 1/1 [00:00<00:00, 153.73it/s]
Casting the dataset: 100%|██████████| 5/5 [00:00<00:00, 2496.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 993.44 examples/s]
Map: 100%|██████████| 5/5 [00:00<00:00, 587.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 199.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 318.79 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.1164660453796387, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3412698412698413, 'eval_span_f1': 0.0, 'eval_runtime': 0.0806, 'eval_samples_per_second': 12.401, 'eval_steps_per_second': 12.401, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.33s/it]

{'eval_loss': 1.7120262384414673, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.676, 'eval_steps_per_second': 11.676, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.57s/it]

{'eval_loss': 1.5225865840911865, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.75, 'eval_steps_per_second': 11.75, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.88s/it]


{'train_runtime': 8.6318, 'train_samples_per_second': 1.738, 'train_steps_per_second': 0.348, 'train_loss': 2.000351587931315, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.63it/s]
100%|██████████| 1/1 [00:00<00:00, 181.27it/s]
100%|██████████| 1/1 [00:00<00:00, 153.51it/s]
Casting the dataset: 100%|██████████| 6/6 [00:00<00:00, 6010.47 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.85 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 570.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.1072752475738525, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.36507936507936506, 'eval_span_f1': 0.0, 'eval_runtime': 0.0911, 'eval_samples_per_second': 10.973, 'eval_steps_per_second': 10.973, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.48s/it]

{'eval_loss': 1.7010184526443481, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0857, 'eval_samples_per_second': 11.675, 'eval_steps_per_second': 11.675, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.72s/it]

{'eval_loss': 1.5117179155349731, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.478, 'eval_steps_per_second': 11.478, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.06s/it]


{'train_runtime': 9.177, 'train_samples_per_second': 1.961, 'train_steps_per_second': 0.327, 'train_loss': 1.9867771466573079, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.55it/s]
100%|██████████| 1/1 [00:00<00:00, 153.51it/s]
100%|██████████| 1/1 [00:00<00:00, 153.47it/s]
Casting the dataset: 100%|██████████| 6/6 [00:00<00:00, 3000.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 663.97 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 521.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.73 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.1072752475738525, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.36507936507936506, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.104, 'eval_steps_per_second': 12.104, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.43s/it]

{'eval_loss': 1.7010184526443481, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0942, 'eval_samples_per_second': 10.618, 'eval_steps_per_second': 10.618, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.73s/it]

{'eval_loss': 1.5117179155349731, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0912, 'eval_samples_per_second': 10.97, 'eval_steps_per_second': 10.97, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.05s/it]


{'train_runtime': 9.1466, 'train_samples_per_second': 1.968, 'train_steps_per_second': 0.328, 'train_loss': 1.9867771466573079, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.78it/s]
100%|██████████| 1/1 [00:00<00:00, 181.48it/s]
100%|██████████| 1/1 [00:00<00:00, 166.43it/s]
Casting the dataset: 100%|██████████| 6/6 [00:00<00:00, 6014.78 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 398.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 571.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 199.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.1072752475738525, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.36507936507936506, 'eval_span_f1': 0.0, 'eval_runtime': 0.0882, 'eval_samples_per_second': 11.343, 'eval_steps_per_second': 11.343, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.51s/it]

{'eval_loss': 1.7010184526443481, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.476, 'eval_steps_per_second': 11.476, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.73s/it]

{'eval_loss': 1.5117179155349731, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.349, 'eval_steps_per_second': 11.349, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.07s/it]


{'train_runtime': 9.1946, 'train_samples_per_second': 1.958, 'train_steps_per_second': 0.326, 'train_loss': 1.9867771466573079, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.63it/s]
100%|██████████| 1/1 [00:00<00:00, 133.19it/s]
100%|██████████| 1/1 [00:00<00:00, 133.05it/s]
Casting the dataset: 100%|██████████| 6/6 [00:00<00:00, 3979.42 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.20 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 570.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.31 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 222.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shou

{'eval_loss': 2.1072752475738525, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.36507936507936506, 'eval_span_f1': 0.0, 'eval_runtime': 0.0951, 'eval_samples_per_second': 10.511, 'eval_steps_per_second': 10.511, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.68s/it]

{'eval_loss': 1.7010184526443481, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.102, 'eval_steps_per_second': 12.102, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  2.88s/it]

{'eval_loss': 1.5117179155349731, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0852, 'eval_samples_per_second': 11.743, 'eval_steps_per_second': 11.743, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.19s/it]


{'train_runtime': 9.5763, 'train_samples_per_second': 1.88, 'train_steps_per_second': 0.313, 'train_loss': 1.9867771466573079, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.70it/s]
100%|██████████| 1/1 [00:00<00:00, 166.62it/s]
100%|██████████| 1/1 [00:00<00:00, 153.77it/s]
Casting the dataset: 100%|██████████| 6/6 [00:00<00:00, 2395.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.51 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 444.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.1072752475738525, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.36507936507936506, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.887, 'eval_steps_per_second': 11.887, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.57s/it]

{'eval_loss': 1.7010184526443481, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0882, 'eval_samples_per_second': 11.344, 'eval_steps_per_second': 11.344, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.75s/it]

{'eval_loss': 1.5117179155349731, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.815, 'eval_steps_per_second': 11.815, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.07s/it]


{'train_runtime': 9.2069, 'train_samples_per_second': 1.955, 'train_steps_per_second': 0.326, 'train_loss': 1.9867771466573079, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.54it/s]
100%|██████████| 1/1 [00:00<00:00, 153.72it/s]
100%|██████████| 1/1 [00:00<00:00, 153.63it/s]
Casting the dataset: 100%|██████████| 7/7 [00:00<00:00, 3489.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.45 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 7/7 [00:00<00:00, 499.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.87 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.108888864517212, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0942, 'eval_samples_per_second': 10.621, 'eval_steps_per_second': 10.621, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.71s/it]

{'eval_loss': 1.7063590288162231, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.649, 'eval_steps_per_second': 11.649, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  3.00s/it]

{'eval_loss': 1.5179877281188965, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.1006, 'eval_samples_per_second': 9.935, 'eval_steps_per_second': 9.935, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.30s/it]


{'train_runtime': 9.9069, 'train_samples_per_second': 2.12, 'train_steps_per_second': 0.303, 'train_loss': 1.988181432088216, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 181.52it/s]
100%|██████████| 1/1 [00:00<00:00, 153.64it/s]
100%|██████████| 1/1 [00:00<00:00, 153.77it/s]
Casting the dataset: 100%|██████████| 7/7 [00:00<00:00, 3494.42 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Map: 100%|██████████| 7/7 [00:00<00:00, 559.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 222.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.108888864517212, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.61, 'eval_steps_per_second': 11.61, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.67s/it]

{'eval_loss': 1.7063590288162231, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.748, 'eval_steps_per_second': 11.748, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  2.87s/it]

{'eval_loss': 1.5179877281188965, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.82, 'eval_steps_per_second': 11.82, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.21s/it]


{'train_runtime': 9.6189, 'train_samples_per_second': 2.183, 'train_steps_per_second': 0.312, 'train_loss': 1.988181432088216, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 181.78it/s]
100%|██████████| 1/1 [00:00<00:00, 142.78it/s]
100%|██████████| 1/1 [00:00<00:00, 153.48it/s]
Casting the dataset: 100%|██████████| 7/7 [00:00<00:00, 2790.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.61 examples/s]
Map: 100%|██████████| 7/7 [00:00<00:00, 607.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shou

{'eval_loss': 2.108888864517212, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0796, 'eval_samples_per_second': 12.561, 'eval_steps_per_second': 12.561, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.71s/it]

{'eval_loss': 1.7063590288162231, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0911, 'eval_samples_per_second': 10.977, 'eval_steps_per_second': 10.977, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  2.90s/it]

{'eval_loss': 1.5179877281188965, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0877, 'eval_samples_per_second': 11.401, 'eval_steps_per_second': 11.401, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.25s/it]


{'train_runtime': 9.7413, 'train_samples_per_second': 2.156, 'train_steps_per_second': 0.308, 'train_loss': 1.988181432088216, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.81it/s]
100%|██████████| 1/1 [00:00<00:00, 133.19it/s]
100%|██████████| 1/1 [00:00<00:00, 142.78it/s]
Casting the dataset: 100%|██████████| 7/7 [00:00<00:00, 4649.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.87 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 7/7 [00:00<00:00, 608.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.108888864517212, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.679, 'eval_steps_per_second': 11.679, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.73s/it]

{'eval_loss': 1.7063590288162231, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0887, 'eval_samples_per_second': 11.278, 'eval_steps_per_second': 11.278, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  2.99s/it]

{'eval_loss': 1.5179877281188965, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.088, 'eval_samples_per_second': 11.362, 'eval_steps_per_second': 11.362, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.32s/it]


{'train_runtime': 9.956, 'train_samples_per_second': 2.109, 'train_steps_per_second': 0.301, 'train_loss': 1.988181432088216, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.71it/s]
100%|██████████| 1/1 [00:00<00:00, 153.61it/s]
100%|██████████| 1/1 [00:00<00:00, 142.80it/s]
Casting the dataset: 100%|██████████| 7/7 [00:00<00:00, 3503.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 398.85 examples/s]
Map: 100%|██████████| 7/7 [00:00<00:00, 608.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 181.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 181.32 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.108888864517212, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.1037, 'eval_samples_per_second': 9.644, 'eval_steps_per_second': 9.644, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.62s/it]

{'eval_loss': 1.7063590288162231, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0877, 'eval_samples_per_second': 11.401, 'eval_steps_per_second': 11.401, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  2.97s/it]

{'eval_loss': 1.5179877281188965, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.151, 'eval_steps_per_second': 12.151, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.28s/it]


{'train_runtime': 9.8445, 'train_samples_per_second': 2.133, 'train_steps_per_second': 0.305, 'train_loss': 1.988181432088216, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.72it/s]
100%|██████████| 1/1 [00:00<00:00, 153.51it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
Casting the dataset: 100%|██████████| 8/8 [00:00<00:00, 8008.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.66 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 570.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.1120877265930176, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0796, 'eval_samples_per_second': 12.559, 'eval_steps_per_second': 12.559, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.84s/it]

{'eval_loss': 1.7079079151153564, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0972, 'eval_samples_per_second': 10.291, 'eval_steps_per_second': 10.291, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:09<00:00,  3.14s/it]

{'eval_loss': 1.5207439661026, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0901, 'eval_samples_per_second': 11.098, 'eval_steps_per_second': 11.098, 'epoch': 3.0}


100%|██████████| 3/3 [00:10<00:00,  3.46s/it]


{'train_runtime': 10.3683, 'train_samples_per_second': 2.315, 'train_steps_per_second': 0.289, 'train_loss': 2.026966094970703, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.12it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 153.62it/s]
Casting the dataset: 100%|██████████| 8/8 [00:00<00:00, 7989.15 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.57 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 570.24 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 248.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.1120877265930176, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0806, 'eval_samples_per_second': 12.4, 'eval_steps_per_second': 12.4, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.85s/it]

{'eval_loss': 1.7079079151153564, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.2459, 'eval_samples_per_second': 4.067, 'eval_steps_per_second': 4.067, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:09<00:00,  3.16s/it]

{'eval_loss': 1.5207439661026, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.821, 'eval_steps_per_second': 11.821, 'epoch': 3.0}


100%|██████████| 3/3 [00:10<00:00,  3.46s/it]


{'train_runtime': 10.3831, 'train_samples_per_second': 2.311, 'train_steps_per_second': 0.289, 'train_loss': 2.026966094970703, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.10it/s]
100%|██████████| 1/1 [00:00<00:00, 166.56it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
Casting the dataset: 100%|██████████| 8/8 [00:00<00:00, 3997.91 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1991.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.71 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 592.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 200.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 222.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.1120877265930176, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.677, 'eval_steps_per_second': 11.677, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.88s/it]

{'eval_loss': 1.7079079151153564, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0947, 'eval_samples_per_second': 10.563, 'eval_steps_per_second': 10.563, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:09<00:00,  3.17s/it]

{'eval_loss': 1.5207439661026, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0946, 'eval_samples_per_second': 10.569, 'eval_steps_per_second': 10.569, 'epoch': 3.0}


100%|██████████| 3/3 [00:10<00:00,  3.48s/it]


{'train_runtime': 10.4503, 'train_samples_per_second': 2.297, 'train_steps_per_second': 0.287, 'train_loss': 2.026966094970703, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 181.63it/s]
100%|██████████| 1/1 [00:00<00:00, 142.81it/s]
100%|██████████| 1/1 [00:00<00:00, 153.59it/s]
Casting the dataset: 100%|██████████| 8/8 [00:00<00:00, 8000.58 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.13 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 551.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 222.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.1120877265930176, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0947, 'eval_samples_per_second': 10.565, 'eval_steps_per_second': 10.565, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.85s/it]

{'eval_loss': 1.7079079151153564, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0976, 'eval_samples_per_second': 10.243, 'eval_steps_per_second': 10.243, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:09<00:00,  3.14s/it]

{'eval_loss': 1.5207439661026, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0936, 'eval_samples_per_second': 10.681, 'eval_steps_per_second': 10.681, 'epoch': 3.0}


100%|██████████| 3/3 [00:10<00:00,  3.46s/it]


{'train_runtime': 10.37, 'train_samples_per_second': 2.314, 'train_steps_per_second': 0.289, 'train_loss': 2.026966094970703, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.76it/s]
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
100%|██████████| 1/1 [00:00<00:00, 133.19it/s]
Casting the dataset: 100%|██████████| 8/8 [00:00<00:00, 3999.81 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 592.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 200.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.29 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.1120877265930176, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.888, 'eval_steps_per_second': 11.888, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.89s/it]

{'eval_loss': 1.7079079151153564, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0947, 'eval_samples_per_second': 10.564, 'eval_steps_per_second': 10.564, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:09<00:00,  3.13s/it]

{'eval_loss': 1.5207439661026, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0896, 'eval_samples_per_second': 11.155, 'eval_steps_per_second': 11.155, 'epoch': 3.0}


100%|██████████| 3/3 [00:10<00:00,  3.46s/it]


{'train_runtime': 10.3775, 'train_samples_per_second': 2.313, 'train_steps_per_second': 0.289, 'train_loss': 2.026966094970703, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.61it/s]
100%|██████████| 1/1 [00:00<00:00, 133.04it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
Casting the dataset: 100%|██████████| 9/9 [00:00<00:00, 9000.65 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 9/9 [00:00<00:00, 528.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 1.6331534385681152, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8174603174603174, 'eval_span_f1': 0.0, 'eval_runtime': 0.0892, 'eval_samples_per_second': 11.216, 'eval_steps_per_second': 11.216, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.64s/it]

{'eval_loss': 1.08551824092865, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0991, 'eval_samples_per_second': 10.086, 'eval_steps_per_second': 10.086, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.73s/it]

{'eval_loss': 0.9529902338981628, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.961, 'eval_steps_per_second': 11.961, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.04s/it]


{'train_runtime': 12.2095, 'train_samples_per_second': 2.211, 'train_steps_per_second': 0.491, 'train_loss': 1.5558730761210124, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.74it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 153.73it/s]
Casting the dataset: 100%|██████████| 9/9 [00:00<00:00, 4486.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 9/9 [00:00<00:00, 427.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 399.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shou

{'eval_loss': 1.6026866436004639, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.1077, 'eval_samples_per_second': 9.289, 'eval_steps_per_second': 9.289, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.67s/it]

{'eval_loss': 1.2240924835205078, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0901, 'eval_samples_per_second': 11.098, 'eval_steps_per_second': 11.098, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.75s/it]

{'eval_loss': 1.0395073890686035, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.817, 'eval_steps_per_second': 11.817, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.07s/it]


{'train_runtime': 12.3994, 'train_samples_per_second': 2.178, 'train_steps_per_second': 0.484, 'train_loss': 1.6379307111104329, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.60it/s]
100%|██████████| 1/1 [00:00<00:00, 153.50it/s]
100%|██████████| 1/1 [00:00<00:00, 153.68it/s]
Casting the dataset: 100%|██████████| 9/9 [00:00<00:00, 3580.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.51 examples/s]
Map: 100%|██████████| 9/9 [00:00<00:00, 579.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 1.6026866436004639, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.681, 'eval_steps_per_second': 11.681, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.69s/it]

{'eval_loss': 1.2240924835205078, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.476, 'eval_steps_per_second': 11.476, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.77s/it]

{'eval_loss': 1.0395073890686035, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.889, 'eval_steps_per_second': 11.889, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.07s/it]


{'train_runtime': 12.4249, 'train_samples_per_second': 2.173, 'train_steps_per_second': 0.483, 'train_loss': 1.6379307111104329, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.14it/s]
100%|██████████| 1/1 [00:00<00:00, 153.55it/s]
100%|██████████| 1/1 [00:00<00:00, 133.10it/s]
Casting the dataset: 100%|██████████| 9/9 [00:00<00:00, 4498.72 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.34 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Map: 100%|██████████| 9/9 [00:00<00:00, 561.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 199.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 1.6026866436004639, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.414, 'eval_steps_per_second': 11.414, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:06<00:03,  1.64s/it]

{'eval_loss': 1.2240924835205078, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0902, 'eval_samples_per_second': 11.091, 'eval_steps_per_second': 11.091, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.74s/it]

{'eval_loss': 1.0395073890686035, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.61, 'eval_steps_per_second': 11.61, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.05s/it]


{'train_runtime': 12.2843, 'train_samples_per_second': 2.198, 'train_steps_per_second': 0.488, 'train_loss': 1.6379307111104329, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 163.37it/s]
100%|██████████| 1/1 [00:00<00:00, 166.61it/s]
100%|██████████| 1/1 [00:00<00:00, 142.67it/s]
Casting the dataset: 100%|██████████| 9/9 [00:00<00:00, 5827.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 646.37 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.73 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 398.89 examples/s]
Map: 100%|██████████| 9/9 [00:00<00:00, 513.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.18 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 1.6026866436004639, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.959, 'eval_steps_per_second': 11.959, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:06<00:03,  1.61s/it]

{'eval_loss': 1.2240924835205078, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.542, 'eval_steps_per_second': 11.542, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.77s/it]

{'eval_loss': 1.0395073890686035, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.677, 'eval_steps_per_second': 11.677, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.07s/it]


{'train_runtime': 12.4178, 'train_samples_per_second': 2.174, 'train_steps_per_second': 0.483, 'train_loss': 1.6379307111104329, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.60it/s]
100%|██████████| 1/1 [00:00<00:00, 133.03it/s]
100%|██████████| 1/1 [00:00<00:00, 153.59it/s]
Casting the dataset: 100%|██████████| 10/10 [00:00<00:00, 4973.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 448.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.5937988758087158, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.412, 'eval_steps_per_second': 11.412, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.72s/it]

{'eval_loss': 1.1999560594558716, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.608, 'eval_steps_per_second': 11.608, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.85s/it]

{'eval_loss': 1.0132163763046265, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0922, 'eval_samples_per_second': 10.851, 'eval_steps_per_second': 10.851, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.15s/it]


{'train_runtime': 12.8789, 'train_samples_per_second': 2.329, 'train_steps_per_second': 0.466, 'train_loss': 1.6177867253621419, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.11it/s]
100%|██████████| 1/1 [00:00<00:00, 153.49it/s]
100%|██████████| 1/1 [00:00<00:00, 166.63it/s]
Casting the dataset: 100%|██████████| 10/10 [00:00<00:00, 6631.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.27 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 499.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.67 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.5937988758087158, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.94, 'eval_steps_per_second': 11.94, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.74s/it]

{'eval_loss': 1.1999560594558716, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.541, 'eval_steps_per_second': 11.541, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.84s/it]

{'eval_loss': 1.0132163763046265, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0806, 'eval_samples_per_second': 12.401, 'eval_steps_per_second': 12.401, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.14s/it]


{'train_runtime': 12.8341, 'train_samples_per_second': 2.338, 'train_steps_per_second': 0.468, 'train_loss': 1.6177867253621419, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.53it/s]
100%|██████████| 1/1 [00:00<00:00, 52.55it/s]
100%|██████████| 1/1 [00:00<00:00, 142.73it/s]
Casting the dataset: 100%|██████████| 10/10 [00:00<00:00, 3983.57 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 443.72 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.31 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.39 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.5937988758087158, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.611, 'eval_steps_per_second': 11.611, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.74s/it]

{'eval_loss': 1.1999560594558716, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0942, 'eval_samples_per_second': 10.621, 'eval_steps_per_second': 10.621, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.85s/it]

{'eval_loss': 1.0132163763046265, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0926, 'eval_samples_per_second': 10.794, 'eval_steps_per_second': 10.794, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.16s/it]


{'train_runtime': 12.9644, 'train_samples_per_second': 2.314, 'train_steps_per_second': 0.463, 'train_loss': 1.6177867253621419, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.81it/s]
100%|██████████| 1/1 [00:00<00:00, 153.63it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
Casting the dataset: 100%|██████████| 10/10 [00:00<00:00, 3992.67 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 454.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.5937988758087158, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.347, 'eval_steps_per_second': 11.347, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.76s/it]

{'eval_loss': 1.1999560594558716, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0887, 'eval_samples_per_second': 11.276, 'eval_steps_per_second': 11.276, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.85s/it]

{'eval_loss': 1.0132163763046265, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0892, 'eval_samples_per_second': 11.216, 'eval_steps_per_second': 11.216, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.18s/it]


{'train_runtime': 13.0698, 'train_samples_per_second': 2.295, 'train_steps_per_second': 0.459, 'train_loss': 1.6177867253621419, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.00it/s]
100%|██████████| 1/1 [00:00<00:00, 142.30it/s]
100%|██████████| 1/1 [00:00<00:00, 153.41it/s]
Casting the dataset: 100%|██████████| 10/10 [00:00<00:00, 9995.96 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.61 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.46 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 499.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 199.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.5937988758087158, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0786, 'eval_samples_per_second': 12.719, 'eval_steps_per_second': 12.719, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.74s/it]

{'eval_loss': 1.1999560594558716, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0922, 'eval_samples_per_second': 10.85, 'eval_steps_per_second': 10.85, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.85s/it]

{'eval_loss': 1.0132163763046265, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.516, 'eval_steps_per_second': 11.516, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.17s/it]


{'train_runtime': 12.9948, 'train_samples_per_second': 2.309, 'train_steps_per_second': 0.462, 'train_loss': 1.6177867253621419, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.78it/s]
100%|██████████| 1/1 [00:00<00:00, 142.63it/s]
100%|██████████| 1/1 [00:00<00:00, 153.72it/s]
Casting the dataset: 100%|██████████| 11/11 [00:00<00:00, 4383.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.50 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 497.90 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 439.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 242.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.6002179384231567, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.683, 'eval_steps_per_second': 11.683, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.80s/it]

{'eval_loss': 1.1864615678787231, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0891, 'eval_samples_per_second': 11.221, 'eval_steps_per_second': 11.221, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  1.96s/it]

{'eval_loss': 1.0188997983932495, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0862, 'eval_samples_per_second': 11.596, 'eval_steps_per_second': 11.596, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.25s/it]


{'train_runtime': 13.5175, 'train_samples_per_second': 2.441, 'train_steps_per_second': 0.444, 'train_loss': 1.615603764851888, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.22it/s]
100%|██████████| 1/1 [00:00<00:00, 153.67it/s]
100%|██████████| 1/1 [00:00<00:00, 166.35it/s]
Casting the dataset: 100%|██████████| 11/11 [00:00<00:00, 4384.43 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.56 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 535.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.6002179384231567, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0987, 'eval_samples_per_second': 10.135, 'eval_steps_per_second': 10.135, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.87s/it]

{'eval_loss': 1.1864615678787231, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0927, 'eval_samples_per_second': 10.791, 'eval_steps_per_second': 10.791, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  2.02s/it]

{'eval_loss': 1.0188997983932495, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.609, 'eval_steps_per_second': 11.609, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.33s/it]


{'train_runtime': 13.9581, 'train_samples_per_second': 2.364, 'train_steps_per_second': 0.43, 'train_loss': 1.615603764851888, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 124.88it/s]
100%|██████████| 1/1 [00:00<00:00, 153.62it/s]
100%|██████████| 1/1 [00:00<00:00, 133.06it/s]
Casting the dataset: 100%|██████████| 11/11 [00:00<00:00, 5499.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 993.91 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.92 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 446.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.50 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.6002179384231567, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0931, 'eval_samples_per_second': 10.736, 'eval_steps_per_second': 10.736, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.90s/it]

{'eval_loss': 1.1864615678787231, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.477, 'eval_steps_per_second': 11.477, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  2.06s/it]

{'eval_loss': 1.0188997983932495, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0967, 'eval_samples_per_second': 10.346, 'eval_steps_per_second': 10.346, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.36s/it]


{'train_runtime': 14.1514, 'train_samples_per_second': 2.332, 'train_steps_per_second': 0.424, 'train_loss': 1.615603764851888, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.14it/s]
100%|██████████| 1/1 [00:00<00:00, 166.26it/s]
100%|██████████| 1/1 [00:00<00:00, 180.51it/s]
Casting the dataset: 100%|██████████| 11/11 [00:00<00:00, 11011.30 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.43 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.44 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 448.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.67 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.6002179384231567, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.815, 'eval_steps_per_second': 11.815, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.85s/it]

{'eval_loss': 1.1864615678787231, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0827, 'eval_samples_per_second': 12.099, 'eval_steps_per_second': 12.099, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  1.95s/it]

{'eval_loss': 1.0188997983932495, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0901, 'eval_samples_per_second': 11.093, 'eval_steps_per_second': 11.093, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.26s/it]


{'train_runtime': 13.5364, 'train_samples_per_second': 2.438, 'train_steps_per_second': 0.443, 'train_loss': 1.615603764851888, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.60it/s]
100%|██████████| 1/1 [00:00<00:00, 153.42it/s]
100%|██████████| 1/1 [00:00<00:00, 153.46it/s]
Casting the dataset: 100%|██████████| 11/11 [00:00<00:00, 5502.37 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 467.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 200.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.6002179384231567, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.885, 'eval_steps_per_second': 11.885, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.92s/it]

{'eval_loss': 1.1864615678787231, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0892, 'eval_samples_per_second': 11.215, 'eval_steps_per_second': 11.215, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  1.99s/it]

{'eval_loss': 1.0188997983932495, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0786, 'eval_samples_per_second': 12.717, 'eval_steps_per_second': 12.717, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.32s/it]


{'train_runtime': 13.8998, 'train_samples_per_second': 2.374, 'train_steps_per_second': 0.432, 'train_loss': 1.615603764851888, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 142.49it/s]
100%|██████████| 1/1 [00:00<00:00, 133.17it/s]
Casting the dataset: 100%|██████████| 12/12 [00:00<00:00, 6001.87 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 420.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 242.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.93 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.6069614887237549, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.748, 'eval_steps_per_second': 11.748, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:03,  1.96s/it]

{'eval_loss': 1.21684730052948, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.886, 'eval_steps_per_second': 11.886, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  2.10s/it]

{'eval_loss': 1.0604244470596313, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0811, 'eval_samples_per_second': 12.327, 'eval_steps_per_second': 12.327, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.37s/it]


{'train_runtime': 14.1946, 'train_samples_per_second': 2.536, 'train_steps_per_second': 0.423, 'train_loss': 1.6193604469299316, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.57it/s]
100%|██████████| 1/1 [00:00<00:00, 166.33it/s]
100%|██████████| 1/1 [00:00<00:00, 153.66it/s]
Casting the dataset: 100%|██████████| 12/12 [00:00<00:00, 6000.43 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.38 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 499.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.6069614887237549, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.1052, 'eval_samples_per_second': 9.508, 'eval_steps_per_second': 9.508, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:03,  2.00s/it]

{'eval_loss': 1.21684730052948, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.35, 'eval_steps_per_second': 11.35, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.14s/it]

{'eval_loss': 1.0604244470596313, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0901, 'eval_samples_per_second': 11.093, 'eval_steps_per_second': 11.093, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.40s/it]


{'train_runtime': 14.4081, 'train_samples_per_second': 2.499, 'train_steps_per_second': 0.416, 'train_loss': 1.6193604469299316, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.66it/s]
100%|██████████| 1/1 [00:00<00:00, 153.68it/s]
100%|██████████| 1/1 [00:00<00:00, 153.60it/s]
Casting the dataset: 100%|██████████| 12/12 [00:00<00:00, 12000.87 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 443.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 200.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.6069614887237549, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.478, 'eval_steps_per_second': 11.478, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.10s/it]

{'eval_loss': 1.21684730052948, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0892, 'eval_samples_per_second': 11.215, 'eval_steps_per_second': 11.215, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.16s/it]

{'eval_loss': 1.0604244470596313, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0937, 'eval_samples_per_second': 10.676, 'eval_steps_per_second': 10.676, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.46s/it]


{'train_runtime': 14.7565, 'train_samples_per_second': 2.44, 'train_steps_per_second': 0.407, 'train_loss': 1.6193604469299316, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
100%|██████████| 1/1 [00:00<00:00, 181.44it/s]
100%|██████████| 1/1 [00:00<00:00, 133.25it/s]
Casting the dataset: 100%|██████████| 12/12 [00:00<00:00, 11989.43 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 663.66 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 460.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 240.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.27 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.6069614887237549, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.345, 'eval_steps_per_second': 11.345, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:03,  1.98s/it]

{'eval_loss': 1.21684730052948, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0897, 'eval_samples_per_second': 11.151, 'eval_steps_per_second': 11.151, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.14s/it]

{'eval_loss': 1.0604244470596313, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0932, 'eval_samples_per_second': 10.735, 'eval_steps_per_second': 10.735, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.42s/it]


{'train_runtime': 14.5144, 'train_samples_per_second': 2.48, 'train_steps_per_second': 0.413, 'train_loss': 1.6193604469299316, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.56it/s]
100%|██████████| 1/1 [00:00<00:00, 133.17it/s]
100%|██████████| 1/1 [00:00<00:00, 153.73it/s]
Casting the dataset: 100%|██████████| 12/12 [00:00<00:00, 11966.63 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.23 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 444.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.03 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.6069614887237549, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.614, 'eval_steps_per_second': 11.614, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.92s/it]

{'eval_loss': 1.21684730052948, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0807, 'eval_samples_per_second': 12.394, 'eval_steps_per_second': 12.394, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  2.03s/it]

{'eval_loss': 1.0604244470596313, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0811, 'eval_samples_per_second': 12.33, 'eval_steps_per_second': 12.33, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.31s/it]


{'train_runtime': 13.8434, 'train_samples_per_second': 2.601, 'train_steps_per_second': 0.433, 'train_loss': 1.6193604469299316, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 151.01it/s]
100%|██████████| 1/1 [00:00<00:00, 153.65it/s]
100%|██████████| 1/1 [00:00<00:00, 153.55it/s]
Casting the dataset: 100%|██████████| 13/13 [00:00<00:00, 13004.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 994.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.55 examples/s]
Map: 100%|██████████| 13/13 [00:00<00:00, 425.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 200.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.84 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.6104252338409424, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0811, 'eval_samples_per_second': 12.326, 'eval_steps_per_second': 12.326, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.02s/it]

{'eval_loss': 1.1794549226760864, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.103, 'eval_steps_per_second': 12.103, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  2.09s/it]

{'eval_loss': 1.0068479776382446, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.482, 'eval_steps_per_second': 11.482, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.37s/it]


{'train_runtime': 14.2349, 'train_samples_per_second': 2.74, 'train_steps_per_second': 0.421, 'train_loss': 1.589874267578125, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 153.59it/s]
100%|██████████| 1/1 [00:00<00:00, 133.27it/s]
Casting the dataset: 100%|██████████| 13/13 [00:00<00:00, 6477.30 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 398.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 13/13 [00:00<00:00, 463.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.64 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.6104252338409424, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.678, 'eval_steps_per_second': 11.678, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.05s/it]

{'eval_loss': 1.1794549226760864, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0867, 'eval_samples_per_second': 11.533, 'eval_steps_per_second': 11.533, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.12s/it]

{'eval_loss': 1.0068479776382446, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.748, 'eval_steps_per_second': 11.748, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.39s/it]


{'train_runtime': 14.3478, 'train_samples_per_second': 2.718, 'train_steps_per_second': 0.418, 'train_loss': 1.589874267578125, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.53it/s]
100%|██████████| 1/1 [00:00<00:00, 166.57it/s]
100%|██████████| 1/1 [00:00<00:00, 142.81it/s]
Casting the dataset: 100%|██████████| 13/13 [00:00<00:00, 6495.82 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 666.40 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 13/13 [00:00<00:00, 490.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.86 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.6104252338409424, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0946, 'eval_samples_per_second': 10.567, 'eval_steps_per_second': 10.567, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.00s/it]

{'eval_loss': 1.1794549226760864, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.251, 'eval_steps_per_second': 12.251, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  2.08s/it]

{'eval_loss': 1.0068479776382446, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0781, 'eval_samples_per_second': 12.803, 'eval_steps_per_second': 12.803, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.37s/it]


{'train_runtime': 14.2327, 'train_samples_per_second': 2.74, 'train_steps_per_second': 0.422, 'train_loss': 1.589874267578125, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.67it/s]
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
100%|██████████| 1/1 [00:00<00:00, 153.55it/s]
Casting the dataset: 100%|██████████| 13/13 [00:00<00:00, 8656.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1985.00 examples/s]
Map: 100%|██████████| 13/13 [00:00<00:00, 490.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.66 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.6104252338409424, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.105, 'eval_steps_per_second': 12.105, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.04s/it]

{'eval_loss': 1.1794549226760864, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0786, 'eval_samples_per_second': 12.723, 'eval_steps_per_second': 12.723, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  2.09s/it]

{'eval_loss': 1.0068479776382446, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.102, 'eval_steps_per_second': 12.102, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.37s/it]


{'train_runtime': 14.2002, 'train_samples_per_second': 2.746, 'train_steps_per_second': 0.423, 'train_loss': 1.589874267578125, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.59it/s]
100%|██████████| 1/1 [00:00<00:00, 133.19it/s]
100%|██████████| 1/1 [00:00<00:00, 153.59it/s]
Casting the dataset: 100%|██████████| 13/13 [00:00<00:00, 12951.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.45 examples/s]
Map: 100%|██████████| 13/13 [00:00<00:00, 472.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.6104252338409424, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.752, 'eval_steps_per_second': 11.752, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.05s/it]

{'eval_loss': 1.1794549226760864, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.678, 'eval_steps_per_second': 11.678, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.13s/it]

{'eval_loss': 1.0068479776382446, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.037, 'eval_steps_per_second': 12.037, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.42s/it]


{'train_runtime': 14.5303, 'train_samples_per_second': 2.684, 'train_steps_per_second': 0.413, 'train_loss': 1.589874267578125, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.58it/s]
100%|██████████| 1/1 [00:00<00:00, 166.43it/s]
100%|██████████| 1/1 [00:00<00:00, 142.77it/s]
Casting the dataset: 100%|██████████| 14/14 [00:00<00:00, 13984.34 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 14/14 [00:00<00:00, 466.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.6106680631637573, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.961, 'eval_steps_per_second': 11.961, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.09s/it]

{'eval_loss': 1.169499158859253, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.349, 'eval_steps_per_second': 11.349, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.23s/it]

{'eval_loss': 0.9434428215026855, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.179, 'eval_steps_per_second': 12.179, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.47s/it]


{'train_runtime': 14.8009, 'train_samples_per_second': 2.838, 'train_steps_per_second': 0.405, 'train_loss': 1.5910181999206543, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.72it/s]
100%|██████████| 1/1 [00:00<00:00, 181.34it/s]
100%|██████████| 1/1 [00:00<00:00, 166.26it/s]
Casting the dataset: 100%|██████████| 14/14 [00:00<00:00, 13997.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 14/14 [00:00<00:00, 490.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.6106680631637573, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.89, 'eval_steps_per_second': 11.89, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.08s/it]

{'eval_loss': 1.169499158859253, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.179, 'eval_steps_per_second': 12.179, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.18s/it]

{'eval_loss': 0.9434428215026855, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.475, 'eval_steps_per_second': 11.475, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.43s/it]


{'train_runtime': 14.5892, 'train_samples_per_second': 2.879, 'train_steps_per_second': 0.411, 'train_loss': 1.5910181999206543, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.56it/s]
100%|██████████| 1/1 [00:00<00:00, 153.47it/s]
100%|██████████| 1/1 [00:00<00:00, 153.44it/s]
Casting the dataset: 100%|██████████| 14/14 [00:00<00:00, 6983.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 14/14 [00:00<00:00, 548.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.47 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.26 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.6106680631637573, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0822, 'eval_samples_per_second': 12.172, 'eval_steps_per_second': 12.172, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.17s/it]

{'eval_loss': 1.169499158859253, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0901, 'eval_samples_per_second': 11.094, 'eval_steps_per_second': 11.094, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.23s/it]

{'eval_loss': 0.9434428215026855, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0812, 'eval_samples_per_second': 12.315, 'eval_steps_per_second': 12.315, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.48s/it]


{'train_runtime': 14.8514, 'train_samples_per_second': 2.828, 'train_steps_per_second': 0.404, 'train_loss': 1.5910181999206543, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.45it/s]
100%|██████████| 1/1 [00:00<00:00, 153.66it/s]
100%|██████████| 1/1 [00:00<00:00, 133.08it/s]
Casting the dataset: 100%|██████████| 14/14 [00:00<00:00, 7000.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Map: 100%|██████████| 14/14 [00:00<00:00, 450.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.6106680631637573, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0787, 'eval_samples_per_second': 12.712, 'eval_steps_per_second': 12.712, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.13s/it]

{'eval_loss': 1.169499158859253, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.177, 'eval_steps_per_second': 12.177, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.27s/it]

{'eval_loss': 0.9434428215026855, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.106, 'eval_steps_per_second': 12.106, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.51s/it]


{'train_runtime': 15.0318, 'train_samples_per_second': 2.794, 'train_steps_per_second': 0.399, 'train_loss': 1.5910181999206543, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.86it/s]
100%|██████████| 1/1 [00:00<00:00, 181.65it/s]
100%|██████████| 1/1 [00:00<00:00, 153.83it/s]
Casting the dataset: 100%|██████████| 14/14 [00:00<00:00, 5590.28 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.73 examples/s]
Map: 100%|██████████| 14/14 [00:00<00:00, 474.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.6106680631637573, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.253, 'eval_steps_per_second': 12.253, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.10s/it]

{'eval_loss': 1.169499158859253, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.03, 'eval_steps_per_second': 12.03, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.23s/it]

{'eval_loss': 0.9434428215026855, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0801, 'eval_samples_per_second': 12.484, 'eval_steps_per_second': 12.484, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.46s/it]


{'train_runtime': 14.781, 'train_samples_per_second': 2.841, 'train_steps_per_second': 0.406, 'train_loss': 1.5910181999206543, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.80it/s]
100%|██████████| 1/1 [00:00<00:00, 153.63it/s]
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
Casting the dataset: 100%|██████████| 15/15 [00:00<00:00, 7503.23 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.50 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Map: 100%|██████████| 15/15 [00:00<00:00, 544.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.607651948928833, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.034, 'eval_steps_per_second': 12.034, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:09<00:04,  2.23s/it]

{'eval_loss': 1.1683000326156616, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0952, 'eval_samples_per_second': 10.509, 'eval_steps_per_second': 10.509, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:14<00:00,  2.39s/it]

{'eval_loss': 0.9785789251327515, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0806, 'eval_samples_per_second': 12.409, 'eval_steps_per_second': 12.409, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.61s/it]


{'train_runtime': 15.6567, 'train_samples_per_second': 2.874, 'train_steps_per_second': 0.383, 'train_loss': 1.593471844991048, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.60it/s]
100%|██████████| 1/1 [00:00<00:00, 181.52it/s]
100%|██████████| 1/1 [00:00<00:00, 152.40it/s]
Casting the dataset: 100%|██████████| 15/15 [00:00<00:00, 7499.65 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.75 examples/s]
Map: 100%|██████████| 15/15 [00:00<00:00, 491.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.607651948928833, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.255, 'eval_steps_per_second': 12.255, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.20s/it]

{'eval_loss': 1.1683000326156616, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.255, 'eval_steps_per_second': 12.255, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.33s/it]

{'eval_loss': 0.9785789251327515, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.746, 'eval_steps_per_second': 11.746, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.55s/it]


{'train_runtime': 15.2815, 'train_samples_per_second': 2.945, 'train_steps_per_second': 0.393, 'train_loss': 1.593471844991048, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.75it/s]
100%|██████████| 1/1 [00:00<00:00, 133.19it/s]
100%|██████████| 1/1 [00:00<00:00, 142.68it/s]
Casting the dataset: 100%|██████████| 15/15 [00:00<00:00, 7500.54 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Map: 100%|██████████| 15/15 [00:00<00:00, 461.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.80 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.607651948928833, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.2554, 'eval_samples_per_second': 3.916, 'eval_steps_per_second': 3.916, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.22s/it]

{'eval_loss': 1.1683000326156616, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0811, 'eval_samples_per_second': 12.327, 'eval_steps_per_second': 12.327, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.32s/it]

{'eval_loss': 0.9785789251327515, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.257, 'eval_steps_per_second': 12.257, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.55s/it]


{'train_runtime': 15.3098, 'train_samples_per_second': 2.939, 'train_steps_per_second': 0.392, 'train_loss': 1.593471844991048, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.80it/s]
100%|██████████| 1/1 [00:00<00:00, 153.60it/s]
100%|██████████| 1/1 [00:00<00:00, 166.61it/s]
Casting the dataset: 100%|██████████| 15/15 [00:00<00:00, 9986.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.85 examples/s]
Map: 100%|██████████| 15/15 [00:00<00:00, 491.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.87 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.607651948928833, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.116, 'eval_samples_per_second': 8.619, 'eval_steps_per_second': 8.619, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:09<00:04,  2.27s/it]

{'eval_loss': 1.1683000326156616, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.028, 'eval_steps_per_second': 12.028, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:14<00:00,  2.34s/it]

{'eval_loss': 0.9785789251327515, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.03, 'eval_steps_per_second': 12.03, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.59s/it]


{'train_runtime': 15.564, 'train_samples_per_second': 2.891, 'train_steps_per_second': 0.386, 'train_loss': 1.593471844991048, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.71it/s]
100%|██████████| 1/1 [00:00<00:00, 133.33it/s]
100%|██████████| 1/1 [00:00<00:00, 153.49it/s]
Casting the dataset: 100%|██████████| 15/15 [00:00<00:00, 7485.37 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.20 examples/s]
Map: 100%|██████████| 15/15 [00:00<00:00, 491.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 1.607651948928833, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.031, 'eval_steps_per_second': 12.031, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:09<00:04,  2.24s/it]

{'eval_loss': 1.1683000326156616, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.253, 'eval_steps_per_second': 12.253, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:14<00:00,  2.38s/it]

{'eval_loss': 0.9785789251327515, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0801, 'eval_samples_per_second': 12.484, 'eval_steps_per_second': 12.484, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.60s/it]


{'train_runtime': 15.6159, 'train_samples_per_second': 2.882, 'train_steps_per_second': 0.384, 'train_loss': 1.593471844991048, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.30it/s]
100%|██████████| 1/1 [00:00<00:00, 133.19it/s]
100%|██████████| 1/1 [00:00<00:00, 153.77it/s]
Casting the dataset: 100%|██████████| 16/16 [00:00<00:00, 7999.63 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.87 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 524.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.602946162223816, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0791, 'eval_samples_per_second': 12.643, 'eval_steps_per_second': 12.643, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:09<00:04,  2.33s/it]

{'eval_loss': 1.1550288200378418, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0806, 'eval_samples_per_second': 12.407, 'eval_steps_per_second': 12.407, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:14<00:00,  2.45s/it]

{'eval_loss': 0.9333235621452332, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0867, 'eval_samples_per_second': 11.528, 'eval_steps_per_second': 11.528, 'epoch': 3.0}


100%|██████████| 6/6 [00:16<00:00,  2.67s/it]


{'train_runtime': 16.0235, 'train_samples_per_second': 2.996, 'train_steps_per_second': 0.374, 'train_loss': 1.5888808568318684, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.85it/s]
100%|██████████| 1/1 [00:00<00:00, 133.23it/s]
100%|██████████| 1/1 [00:00<00:00, 181.53it/s]
Casting the dataset: 100%|██████████| 16/16 [00:00<00:00, 10588.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 663.97 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 409.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 181.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.602946162223816, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.89, 'eval_steps_per_second': 11.89, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:09<00:04,  2.47s/it]

{'eval_loss': 1.1550288200378418, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0921, 'eval_samples_per_second': 10.854, 'eval_steps_per_second': 10.854, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:15<00:00,  2.53s/it]

{'eval_loss': 0.9333235621452332, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0947, 'eval_samples_per_second': 10.562, 'eval_steps_per_second': 10.562, 'epoch': 3.0}


100%|██████████| 6/6 [00:16<00:00,  2.75s/it]


{'train_runtime': 16.4857, 'train_samples_per_second': 2.912, 'train_steps_per_second': 0.364, 'train_loss': 1.5888808568318684, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
100%|██████████| 1/1 [00:00<00:00, 166.26it/s]
100%|██████████| 1/1 [00:00<00:00, 166.34it/s]
Casting the dataset: 100%|██████████| 16/16 [00:00<00:00, 7992.00 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.08 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 469.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 398.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.602946162223816, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.766, 'eval_steps_per_second': 11.766, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:09<00:04,  2.32s/it]

{'eval_loss': 1.1550288200378418, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.346, 'eval_steps_per_second': 11.346, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:14<00:00,  2.48s/it]

{'eval_loss': 0.9333235621452332, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.681, 'eval_steps_per_second': 11.681, 'epoch': 3.0}


100%|██████████| 6/6 [00:16<00:00,  2.67s/it]


{'train_runtime': 16.0265, 'train_samples_per_second': 2.995, 'train_steps_per_second': 0.374, 'train_loss': 1.5888808568318684, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.59it/s]
100%|██████████| 1/1 [00:00<00:00, 142.82it/s]
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
Casting the dataset: 100%|██████████| 16/16 [00:00<00:00, 15997.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.39 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 456.72 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.602946162223816, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.542, 'eval_steps_per_second': 11.542, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:09<00:04,  2.35s/it]

{'eval_loss': 1.1550288200378418, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.744, 'eval_steps_per_second': 11.744, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:14<00:00,  2.51s/it]

{'eval_loss': 0.9333235621452332, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.746, 'eval_steps_per_second': 11.746, 'epoch': 3.0}


100%|██████████| 6/6 [00:16<00:00,  2.69s/it]


{'train_runtime': 16.1547, 'train_samples_per_second': 2.971, 'train_steps_per_second': 0.371, 'train_loss': 1.5888808568318684, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.57it/s]
100%|██████████| 1/1 [00:00<00:00, 153.47it/s]
100%|██████████| 1/1 [00:00<00:00, 166.57it/s]
Casting the dataset: 100%|██████████| 16/16 [00:00<00:00, 7991.05 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.23 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 477.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.602946162223816, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0812, 'eval_samples_per_second': 12.313, 'eval_steps_per_second': 12.313, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:09<00:04,  2.33s/it]

{'eval_loss': 1.1550288200378418, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0847, 'eval_samples_per_second': 11.813, 'eval_steps_per_second': 11.813, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:14<00:00,  2.42s/it]

{'eval_loss': 0.9333235621452332, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.744, 'eval_steps_per_second': 11.744, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.64s/it]


{'train_runtime': 15.85, 'train_samples_per_second': 3.028, 'train_steps_per_second': 0.379, 'train_loss': 1.5888808568318684, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
100%|██████████| 1/1 [00:00<00:00, 153.47it/s]
100%|██████████| 1/1 [00:00<00:00, 166.54it/s]
Casting the dataset: 100%|██████████| 17/17 [00:00<00:00, 17001.23 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.74 examples/s]
Map: 100%|██████████| 17/17 [00:00<00:00, 485.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.08 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.2975115776062012, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0951, 'eval_samples_per_second': 10.511, 'eval_steps_per_second': 10.511, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.67s/it]

{'eval_loss': 0.6981537938117981, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0853, 'eval_samples_per_second': 11.73, 'eval_steps_per_second': 11.73, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.70s/it]

{'eval_loss': 0.6631708145141602, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.19, 'eval_steps_per_second': 12.19, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.05s/it]


{'train_runtime': 18.4222, 'train_samples_per_second': 2.768, 'train_steps_per_second': 0.489, 'train_loss': 1.3885616726345487, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.73it/s]
100%|██████████| 1/1 [00:00<00:00, 153.81it/s]
100%|██████████| 1/1 [00:00<00:00, 133.11it/s]
Casting the dataset: 100%|██████████| 17/17 [00:00<00:00, 6782.38 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 994.85 examples/s]
Map: 100%|██████████| 17/17 [00:00<00:00, 452.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.11 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.2130427360534668, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.888, 'eval_steps_per_second': 11.888, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.68s/it]

{'eval_loss': 0.8801873326301575, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.104, 'eval_steps_per_second': 12.104, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.70s/it]

{'eval_loss': 0.8948651552200317, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0931, 'eval_samples_per_second': 10.737, 'eval_steps_per_second': 10.737, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.08s/it]


{'train_runtime': 18.7444, 'train_samples_per_second': 2.721, 'train_steps_per_second': 0.48, 'train_loss': 1.324554443359375, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
100%|██████████| 1/1 [00:00<00:00, 153.60it/s]
100%|██████████| 1/1 [00:00<00:00, 153.65it/s]
Casting the dataset: 100%|██████████| 17/17 [00:00<00:00, 11250.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.20 examples/s]
Map: 100%|██████████| 17/17 [00:00<00:00, 485.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.2130427360534668, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.68, 'eval_steps_per_second': 11.68, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:10<00:04,  1.66s/it]

{'eval_loss': 0.8801873326301575, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.178, 'eval_steps_per_second': 12.178, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.76s/it]

{'eval_loss': 0.8948651552200317, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0916, 'eval_samples_per_second': 10.918, 'eval_steps_per_second': 10.918, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.08s/it]


{'train_runtime': 18.7433, 'train_samples_per_second': 2.721, 'train_steps_per_second': 0.48, 'train_loss': 1.324554443359375, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.08it/s]
100%|██████████| 1/1 [00:00<00:00, 166.47it/s]
100%|██████████| 1/1 [00:00<00:00, 153.65it/s]
Casting the dataset: 100%|██████████| 17/17 [00:00<00:00, 8480.40 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 17/17 [00:00<00:00, 446.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.2130427360534668, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.888, 'eval_steps_per_second': 11.888, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.67s/it]

{'eval_loss': 0.8801873326301575, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.25, 'eval_steps_per_second': 12.25, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.70s/it]

{'eval_loss': 0.8948651552200317, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.105, 'eval_steps_per_second': 12.105, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.05s/it]


{'train_runtime': 18.4924, 'train_samples_per_second': 2.758, 'train_steps_per_second': 0.487, 'train_loss': 1.324554443359375, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.75it/s]
100%|██████████| 1/1 [00:00<00:00, 153.82it/s]
100%|██████████| 1/1 [00:00<00:00, 166.54it/s]
Casting the dataset: 100%|██████████| 17/17 [00:00<00:00, 16980.99 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.26 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.55 examples/s]
Map: 100%|██████████| 17/17 [00:00<00:00, 446.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 307.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.21 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.2130427360534668, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.678, 'eval_steps_per_second': 11.678, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.67s/it]

{'eval_loss': 0.8801873326301575, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.748, 'eval_steps_per_second': 11.748, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.74s/it]

{'eval_loss': 0.8948651552200317, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0981, 'eval_samples_per_second': 10.19, 'eval_steps_per_second': 10.19, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.07s/it]


{'train_runtime': 18.6288, 'train_samples_per_second': 2.738, 'train_steps_per_second': 0.483, 'train_loss': 1.324554443359375, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.68it/s]
100%|██████████| 1/1 [00:00<00:00, 166.53it/s]
100%|██████████| 1/1 [00:00<00:00, 142.46it/s]
Casting the dataset: 100%|██████████| 18/18 [00:00<00:00, 8987.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 663.45 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 18/18 [00:00<00:00, 408.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 399.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 222.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.2079145908355713, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0807, 'eval_samples_per_second': 12.39, 'eval_steps_per_second': 12.39, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:10<00:05,  1.69s/it]

{'eval_loss': 0.878728449344635, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.96, 'eval_steps_per_second': 11.96, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.75s/it]

{'eval_loss': 0.8953295350074768, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.82, 'eval_steps_per_second': 11.82, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.05s/it]


{'train_runtime': 18.4615, 'train_samples_per_second': 2.925, 'train_steps_per_second': 0.488, 'train_loss': 1.321828842163086, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.63it/s]
100%|██████████| 1/1 [00:00<00:00, 153.75it/s]
100%|██████████| 1/1 [00:00<00:00, 133.14it/s]
Casting the dataset: 100%|██████████| 18/18 [00:00<00:00, 8995.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.02 examples/s]
Map: 100%|██████████| 18/18 [00:00<00:00, 454.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.2079145908355713, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0956, 'eval_samples_per_second': 10.458, 'eval_steps_per_second': 10.458, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.71s/it]

{'eval_loss': 0.878728449344635, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.411, 'eval_steps_per_second': 11.411, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.76s/it]

{'eval_loss': 0.8953295350074768, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0796, 'eval_samples_per_second': 12.558, 'eval_steps_per_second': 12.558, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.08s/it]


{'train_runtime': 18.7299, 'train_samples_per_second': 2.883, 'train_steps_per_second': 0.481, 'train_loss': 1.321828842163086, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.79it/s]
100%|██████████| 1/1 [00:00<00:00, 153.49it/s]
100%|██████████| 1/1 [00:00<00:00, 142.67it/s]
Casting the dataset: 100%|██████████| 18/18 [00:00<00:00, 8989.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 845.63 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 18/18 [00:00<00:00, 460.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.2079145908355713, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.959, 'eval_steps_per_second': 11.959, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.74s/it]

{'eval_loss': 0.878728449344635, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.103, 'eval_steps_per_second': 12.103, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.76s/it]

{'eval_loss': 0.8953295350074768, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0901, 'eval_samples_per_second': 11.102, 'eval_steps_per_second': 11.102, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.08s/it]


{'train_runtime': 18.7407, 'train_samples_per_second': 2.881, 'train_steps_per_second': 0.48, 'train_loss': 1.321828842163086, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.07it/s]
100%|██████████| 1/1 [00:00<00:00, 141.59it/s]
100%|██████████| 1/1 [00:00<00:00, 124.86it/s]
Casting the dataset: 100%|██████████| 18/18 [00:00<00:00, 17992.72 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 18/18 [00:00<00:00, 399.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 244.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.03 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.2079145908355713, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.888, 'eval_steps_per_second': 11.888, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.72s/it]

{'eval_loss': 0.878728449344635, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.818, 'eval_steps_per_second': 11.818, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.77s/it]

{'eval_loss': 0.8953295350074768, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.101, 'eval_steps_per_second': 12.101, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.07s/it]


{'train_runtime': 18.6194, 'train_samples_per_second': 2.9, 'train_steps_per_second': 0.483, 'train_loss': 1.321828842163086, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.68it/s]
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
100%|██████████| 1/1 [00:00<00:00, 153.58it/s]
Casting the dataset: 100%|██████████| 18/18 [00:00<00:00, 8992.08 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 18/18 [00:00<00:00, 432.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.2079145908355713, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.681, 'eval_steps_per_second': 11.681, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.74s/it]

{'eval_loss': 0.878728449344635, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.545, 'eval_steps_per_second': 11.545, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.76s/it]

{'eval_loss': 0.8953295350074768, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0837, 'eval_samples_per_second': 11.954, 'eval_steps_per_second': 11.954, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.10s/it]


{'train_runtime': 18.9155, 'train_samples_per_second': 2.855, 'train_steps_per_second': 0.476, 'train_loss': 1.321828842163086, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.44it/s]
100%|██████████| 1/1 [00:00<00:00, 166.55it/s]
100%|██████████| 1/1 [00:00<00:00, 153.60it/s]
Casting the dataset: 100%|██████████| 19/19 [00:00<00:00, 6334.30 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 994.85 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 19/19 [00:00<00:00, 441.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 324.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.2049435377120972, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1341, 'eval_samples_per_second': 7.455, 'eval_steps_per_second': 7.455, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.81s/it]

{'eval_loss': 0.8786906003952026, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.963, 'eval_steps_per_second': 11.963, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  1.89s/it]

{'eval_loss': 0.8950574398040771, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1257, 'eval_samples_per_second': 7.955, 'eval_steps_per_second': 7.955, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.17s/it]


{'train_runtime': 19.4884, 'train_samples_per_second': 2.925, 'train_steps_per_second': 0.462, 'train_loss': 1.3181480831570096, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.80it/s]
100%|██████████| 1/1 [00:00<00:00, 161.90it/s]
100%|██████████| 1/1 [00:00<00:00, 142.77it/s]
Casting the dataset: 100%|██████████| 19/19 [00:00<00:00, 18978.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.71 examples/s]
Map: 100%|██████████| 19/19 [00:00<00:00, 446.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.66 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.2049435377120972, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0796, 'eval_samples_per_second': 12.557, 'eval_steps_per_second': 12.557, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.76s/it]

{'eval_loss': 0.8786906003952026, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.885, 'eval_steps_per_second': 11.885, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.83s/it]

{'eval_loss': 0.8950574398040771, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.101, 'eval_steps_per_second': 12.101, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.11s/it]


{'train_runtime': 19.0246, 'train_samples_per_second': 2.996, 'train_steps_per_second': 0.473, 'train_loss': 1.3181480831570096, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.81it/s]
100%|██████████| 1/1 [00:00<00:00, 153.47it/s]
100%|██████████| 1/1 [00:00<00:00, 138.09it/s]
Casting the dataset: 100%|██████████| 19/19 [00:00<00:00, 9492.77 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 993.91 examples/s]
Map: 100%|██████████| 19/19 [00:00<00:00, 421.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.2049435377120972, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0896, 'eval_samples_per_second': 11.158, 'eval_steps_per_second': 11.158, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.83s/it]

{'eval_loss': 0.8786906003952026, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1032, 'eval_samples_per_second': 9.691, 'eval_steps_per_second': 9.691, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.84s/it]

{'eval_loss': 0.8950574398040771, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.03, 'eval_steps_per_second': 12.03, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.14s/it]


{'train_runtime': 19.2917, 'train_samples_per_second': 2.955, 'train_steps_per_second': 0.467, 'train_loss': 1.3181480831570096, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.11it/s]
100%|██████████| 1/1 [00:00<00:00, 142.78it/s]
100%|██████████| 1/1 [00:00<00:00, 153.42it/s]
Casting the dataset: 100%|██████████| 19/19 [00:00<00:00, 19001.38 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.13 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Map: 100%|██████████| 19/19 [00:00<00:00, 479.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 498.67 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.2049435377120972, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0937, 'eval_samples_per_second': 10.677, 'eval_steps_per_second': 10.677, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.76s/it]

{'eval_loss': 0.8786906003952026, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.963, 'eval_steps_per_second': 11.963, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.79s/it]

{'eval_loss': 0.8950574398040771, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0832, 'eval_samples_per_second': 12.019, 'eval_steps_per_second': 12.019, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.08s/it]


{'train_runtime': 18.7275, 'train_samples_per_second': 3.044, 'train_steps_per_second': 0.481, 'train_loss': 1.3181480831570096, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.45it/s]
100%|██████████| 1/1 [00:00<00:00, 153.55it/s]
100%|██████████| 1/1 [00:00<00:00, 166.54it/s]
Casting the dataset: 100%|██████████| 19/19 [00:00<00:00, 18978.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.60 examples/s]
Map: 100%|██████████| 19/19 [00:00<00:00, 436.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 398.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.08 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.2049435377120972, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.819, 'eval_steps_per_second': 11.819, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.83s/it]

{'eval_loss': 0.8786906003952026, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0926, 'eval_samples_per_second': 10.798, 'eval_steps_per_second': 10.798, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  1.91s/it]

{'eval_loss': 0.8950574398040771, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.25, 'eval_steps_per_second': 12.25, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.17s/it]


{'train_runtime': 19.5341, 'train_samples_per_second': 2.918, 'train_steps_per_second': 0.461, 'train_loss': 1.3181480831570096, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.67it/s]
100%|██████████| 1/1 [00:00<00:00, 142.94it/s]
100%|██████████| 1/1 [00:00<00:00, 142.71it/s]
Casting the dataset: 100%|██████████| 20/20 [00:00<00:00, 20030.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 497.84 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 663.45 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 429.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.2013012170791626, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.888, 'eval_steps_per_second': 11.888, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.91s/it]

{'eval_loss': 0.8751160502433777, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0971, 'eval_samples_per_second': 10.295, 'eval_steps_per_second': 10.295, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.06s/it]

{'eval_loss': 0.8917906284332275, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.099, 'eval_steps_per_second': 12.099, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.28s/it]


{'train_runtime': 20.5056, 'train_samples_per_second': 2.926, 'train_steps_per_second': 0.439, 'train_loss': 1.3193510903252497, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.16it/s]
100%|██████████| 1/1 [00:00<00:00, 162.00it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
Casting the dataset: 100%|██████████| 20/20 [00:00<00:00, 19906.52 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.67 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 403.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 297.51 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.2013012170791626, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0862, 'eval_samples_per_second': 11.605, 'eval_steps_per_second': 11.605, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:05,  1.92s/it]

{'eval_loss': 0.8751160502433777, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.354, 'eval_steps_per_second': 11.354, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  1.93s/it]

{'eval_loss': 0.8917906284332275, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.744, 'eval_steps_per_second': 11.744, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.24s/it]


{'train_runtime': 20.1466, 'train_samples_per_second': 2.978, 'train_steps_per_second': 0.447, 'train_loss': 1.3193510903252497, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.55it/s]
100%|██████████| 1/1 [00:00<00:00, 153.55it/s]
100%|██████████| 1/1 [00:00<00:00, 153.56it/s]
Casting the dataset: 100%|██████████| 20/20 [00:00<00:00, 20006.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 459.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 398.47 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.2013012170791626, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.958, 'eval_steps_per_second': 11.958, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.85s/it]

{'eval_loss': 0.8751160502433777, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0791, 'eval_samples_per_second': 12.64, 'eval_steps_per_second': 12.64, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  1.90s/it]

{'eval_loss': 0.8917906284332275, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0771, 'eval_samples_per_second': 12.967, 'eval_steps_per_second': 12.967, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.17s/it]


{'train_runtime': 19.5673, 'train_samples_per_second': 3.066, 'train_steps_per_second': 0.46, 'train_loss': 1.3193510903252497, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.83it/s]
100%|██████████| 1/1 [00:00<00:00, 166.59it/s]
100%|██████████| 1/1 [00:00<00:00, 153.82it/s]
Casting the dataset: 100%|██████████| 20/20 [00:00<00:00, 19987.15 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 663.45 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.85 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 499.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 253.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 200.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.2013012170791626, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0891, 'eval_samples_per_second': 11.222, 'eval_steps_per_second': 11.222, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.88s/it]

{'eval_loss': 0.8751160502433777, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0771, 'eval_samples_per_second': 12.967, 'eval_steps_per_second': 12.967, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  1.94s/it]

{'eval_loss': 0.8917906284332275, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1007, 'eval_samples_per_second': 9.932, 'eval_steps_per_second': 9.932, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.21s/it]


{'train_runtime': 19.9284, 'train_samples_per_second': 3.011, 'train_steps_per_second': 0.452, 'train_loss': 1.3193510903252497, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.81it/s]
100%|██████████| 1/1 [00:00<00:00, 162.04it/s]
100%|██████████| 1/1 [00:00<00:00, 166.13it/s]
Casting the dataset: 100%|██████████| 20/20 [00:00<00:00, 19968.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 459.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 288.51 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.2013012170791626, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.676, 'eval_steps_per_second': 11.676, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.86s/it]

{'eval_loss': 0.8751160502433777, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.748, 'eval_steps_per_second': 11.748, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  1.91s/it]

{'eval_loss': 0.8917906284332275, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.962, 'eval_steps_per_second': 11.962, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.20s/it]


{'train_runtime': 19.7583, 'train_samples_per_second': 3.037, 'train_steps_per_second': 0.456, 'train_loss': 1.3193510903252497, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.10it/s]
100%|██████████| 1/1 [00:00<00:00, 142.90it/s]
100%|██████████| 1/1 [00:00<00:00, 153.62it/s]
Casting the dataset: 100%|██████████| 21/21 [00:00<00:00, 20941.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Map: 100%|██████████| 21/21 [00:00<00:00, 466.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.85 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pr

{'eval_loss': 1.2011516094207764, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1067, 'eval_samples_per_second': 9.376, 'eval_steps_per_second': 9.376, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:05,  1.95s/it]

{'eval_loss': 0.874758243560791, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.611, 'eval_steps_per_second': 11.611, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  1.99s/it]

{'eval_loss': 0.8916106224060059, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.252, 'eval_steps_per_second': 12.252, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.25s/it]


{'train_runtime': 20.2799, 'train_samples_per_second': 3.107, 'train_steps_per_second': 0.444, 'train_loss': 1.3226619296603732, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.16it/s]
100%|██████████| 1/1 [00:00<00:00, 153.71it/s]
100%|██████████| 1/1 [00:00<00:00, 133.21it/s]
Casting the dataset: 100%|██████████| 21/21 [00:00<00:00, 20966.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 497.90 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.13 examples/s]
Map: 100%|██████████| 21/21 [00:00<00:00, 456.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.2011516094207764, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0796, 'eval_samples_per_second': 12.563, 'eval_steps_per_second': 12.563, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:05,  1.96s/it]

{'eval_loss': 0.874758243560791, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.252, 'eval_steps_per_second': 12.252, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.00s/it]

{'eval_loss': 0.8916106224060059, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0873, 'eval_samples_per_second': 11.454, 'eval_steps_per_second': 11.454, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.27s/it]


{'train_runtime': 20.3987, 'train_samples_per_second': 3.088, 'train_steps_per_second': 0.441, 'train_loss': 1.3226619296603732, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.37it/s]
100%|██████████| 1/1 [00:00<00:00, 142.75it/s]
100%|██████████| 1/1 [00:00<00:00, 153.49it/s]
Casting the dataset: 100%|██████████| 21/21 [00:00<00:00, 20981.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 497.19 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.23 examples/s]
Map: 100%|██████████| 21/21 [00:00<00:00, 423.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 316.43 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.2011516094207764, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0801, 'eval_samples_per_second': 12.482, 'eval_steps_per_second': 12.482, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:05,  1.97s/it]

{'eval_loss': 0.874758243560791, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.544, 'eval_steps_per_second': 11.544, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.06s/it]

{'eval_loss': 0.8916106224060059, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0877, 'eval_samples_per_second': 11.408, 'eval_steps_per_second': 11.408, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.30s/it]


{'train_runtime': 20.7205, 'train_samples_per_second': 3.04, 'train_steps_per_second': 0.434, 'train_loss': 1.3226619296603732, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.50it/s]
100%|██████████| 1/1 [00:00<00:00, 153.56it/s]
100%|██████████| 1/1 [00:00<00:00, 166.53it/s]
Casting the dataset: 100%|██████████| 21/21 [00:00<00:00, 10489.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.26 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 497.49 examples/s]
Map: 100%|██████████| 21/21 [00:00<00:00, 451.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 310.83 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.2011516094207764, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.817, 'eval_steps_per_second': 11.817, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:05,  1.98s/it]

{'eval_loss': 0.874758243560791, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.819, 'eval_steps_per_second': 11.819, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  2.00s/it]

{'eval_loss': 0.8916106224060059, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.25, 'eval_steps_per_second': 12.25, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.27s/it]


{'train_runtime': 20.4694, 'train_samples_per_second': 3.078, 'train_steps_per_second': 0.44, 'train_loss': 1.3226619296603732, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.03it/s]
100%|██████████| 1/1 [00:00<00:00, 153.51it/s]
100%|██████████| 1/1 [00:00<00:00, 153.72it/s]
Casting the dataset: 100%|██████████| 21/21 [00:00<00:00, 20971.52 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Map: 100%|██████████| 21/21 [00:00<00:00, 460.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.2011516094207764, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0882, 'eval_samples_per_second': 11.338, 'eval_steps_per_second': 11.338, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:05,  1.92s/it]

{'eval_loss': 0.874758243560791, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0862, 'eval_samples_per_second': 11.596, 'eval_steps_per_second': 11.596, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  2.00s/it]

{'eval_loss': 0.8916106224060059, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.101, 'eval_steps_per_second': 12.101, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.25s/it]


{'train_runtime': 20.2453, 'train_samples_per_second': 3.112, 'train_steps_per_second': 0.445, 'train_loss': 1.3226619296603732, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 140.23it/s]
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
100%|██████████| 1/1 [00:00<00:00, 153.61it/s]
Casting the dataset: 100%|██████████| 22/22 [00:00<00:00, 21996.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Map: 100%|██████████| 22/22 [00:00<00:00, 453.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.2006571292877197, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0781, 'eval_samples_per_second': 12.801, 'eval_steps_per_second': 12.801, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:06,  2.00s/it]

{'eval_loss': 0.8748136758804321, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0792, 'eval_samples_per_second': 12.626, 'eval_steps_per_second': 12.626, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.08s/it]

{'eval_loss': 0.8899617791175842, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0951, 'eval_samples_per_second': 10.515, 'eval_steps_per_second': 10.515, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.31s/it]


{'train_runtime': 20.773, 'train_samples_per_second': 3.177, 'train_steps_per_second': 0.433, 'train_loss': 1.3192229800754123, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.04it/s]
100%|██████████| 1/1 [00:00<00:00, 142.63it/s]
100%|██████████| 1/1 [00:00<00:00, 153.65it/s]
Casting the dataset: 100%|██████████| 22/22 [00:00<00:00, 10989.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 22/22 [00:00<00:00, 418.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 243.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.2006571292877197, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.955, 'eval_steps_per_second': 11.955, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:06,  2.10s/it]

{'eval_loss': 0.8748136758804321, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.344, 'eval_steps_per_second': 11.344, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.13s/it]

{'eval_loss': 0.8899617791175842, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.478, 'eval_steps_per_second': 11.478, 'epoch': 3.0}


100%|██████████| 9/9 [00:21<00:00,  2.37s/it]


{'train_runtime': 21.2953, 'train_samples_per_second': 3.099, 'train_steps_per_second': 0.423, 'train_loss': 1.3192229800754123, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.11it/s]
100%|██████████| 1/1 [00:00<00:00, 133.14it/s]
100%|██████████| 1/1 [00:00<00:00, 166.37it/s]
Casting the dataset: 100%|██████████| 22/22 [00:00<00:00, 21902.37 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.74 examples/s]
Map: 100%|██████████| 22/22 [00:00<00:00, 435.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.92 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.2006571292877197, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0891, 'eval_samples_per_second': 11.226, 'eval_steps_per_second': 11.226, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:06,  2.04s/it]

{'eval_loss': 0.8748136758804321, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.746, 'eval_steps_per_second': 11.746, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.14s/it]

{'eval_loss': 0.8899617791175842, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0886, 'eval_samples_per_second': 11.287, 'eval_steps_per_second': 11.287, 'epoch': 3.0}


100%|██████████| 9/9 [00:21<00:00,  2.34s/it]


{'train_runtime': 21.0621, 'train_samples_per_second': 3.134, 'train_steps_per_second': 0.427, 'train_loss': 1.3192229800754123, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 200.04it/s]
100%|██████████| 1/1 [00:00<00:00, 133.13it/s]
100%|██████████| 1/1 [00:00<00:00, 142.81it/s]
Casting the dataset: 100%|██████████| 22/22 [00:00<00:00, 21917.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.87 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.87 examples/s]
Map: 100%|██████████| 22/22 [00:00<00:00, 403.31 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.31 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.2006571292877197, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0936, 'eval_samples_per_second': 10.683, 'eval_steps_per_second': 10.683, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:05,  1.99s/it]

{'eval_loss': 0.8748136758804321, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0801, 'eval_samples_per_second': 12.485, 'eval_steps_per_second': 12.485, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.06s/it]

{'eval_loss': 0.8899617791175842, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.417, 'eval_steps_per_second': 11.417, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.29s/it]


{'train_runtime': 20.5955, 'train_samples_per_second': 3.205, 'train_steps_per_second': 0.437, 'train_loss': 1.3192229800754123, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.53it/s]
100%|██████████| 1/1 [00:00<00:00, 153.83it/s]
100%|██████████| 1/1 [00:00<00:00, 153.68it/s]
Casting the dataset: 100%|██████████| 22/22 [00:00<00:00, 21996.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 22/22 [00:00<00:00, 414.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.2006571292877197, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1121, 'eval_samples_per_second': 8.919, 'eval_steps_per_second': 8.919, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:06,  2.03s/it]

{'eval_loss': 0.8748136758804321, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.351, 'eval_steps_per_second': 11.351, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.05s/it]

{'eval_loss': 0.8899617791175842, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0911, 'eval_samples_per_second': 10.973, 'eval_steps_per_second': 10.973, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.30s/it]


{'train_runtime': 20.6614, 'train_samples_per_second': 3.194, 'train_steps_per_second': 0.436, 'train_loss': 1.3192229800754123, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.56it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 142.62it/s]
Casting the dataset: 100%|██████████| 23/23 [00:00<00:00, 11499.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Map: 100%|██████████| 23/23 [00:00<00:00, 410.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.199655532836914, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0885, 'eval_samples_per_second': 11.3, 'eval_steps_per_second': 11.3, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:13<00:06,  2.15s/it]

{'eval_loss': 0.8724716305732727, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0786, 'eval_samples_per_second': 12.718, 'eval_steps_per_second': 12.718, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:20<00:00,  2.19s/it]

{'eval_loss': 0.8873103260993958, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0906, 'eval_samples_per_second': 11.035, 'eval_steps_per_second': 11.035, 'epoch': 3.0}


100%|██████████| 9/9 [00:21<00:00,  2.40s/it]


{'train_runtime': 21.5646, 'train_samples_per_second': 3.2, 'train_steps_per_second': 0.417, 'train_loss': 1.317742771572537, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.54it/s]
100%|██████████| 1/1 [00:00<00:00, 166.50it/s]
100%|██████████| 1/1 [00:00<00:00, 153.69it/s]
Casting the dataset: 100%|██████████| 23/23 [00:00<00:00, 11509.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 23/23 [00:00<00:00, 421.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 398.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 398.13 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.199655532836914, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0996, 'eval_samples_per_second': 10.036, 'eval_steps_per_second': 10.036, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:13<00:06,  2.15s/it]

{'eval_loss': 0.8724716305732727, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0855, 'eval_samples_per_second': 11.694, 'eval_steps_per_second': 11.694, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:20<00:00,  2.16s/it]

{'eval_loss': 0.8873103260993958, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.091, 'eval_samples_per_second': 10.989, 'eval_steps_per_second': 10.989, 'epoch': 3.0}


100%|██████████| 9/9 [00:21<00:00,  2.40s/it]


{'train_runtime': 21.6012, 'train_samples_per_second': 3.194, 'train_steps_per_second': 0.417, 'train_loss': 1.317742771572537, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 110.72it/s]
100%|██████████| 1/1 [00:00<00:00, 117.47it/s]
100%|██████████| 1/1 [00:00<00:00, 133.12it/s]
Casting the dataset: 100%|██████████| 23/23 [00:00<00:00, 11489.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.08 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 497.19 examples/s]
Map: 100%|██████████| 23/23 [00:00<00:00, 375.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.199655532836914, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.482, 'eval_steps_per_second': 11.482, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:13<00:06,  2.12s/it]

{'eval_loss': 0.8724716305732727, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0792, 'eval_samples_per_second': 12.624, 'eval_steps_per_second': 12.624, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:20<00:00,  2.26s/it]

{'eval_loss': 0.8873103260993958, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0921, 'eval_samples_per_second': 10.854, 'eval_steps_per_second': 10.854, 'epoch': 3.0}


100%|██████████| 9/9 [00:22<00:00,  2.44s/it]


{'train_runtime': 22.0001, 'train_samples_per_second': 3.136, 'train_steps_per_second': 0.409, 'train_loss': 1.317742771572537, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.65it/s]
100%|██████████| 1/1 [00:00<00:00, 199.89it/s]
100%|██████████| 1/1 [00:00<00:00, 142.81it/s]
Casting the dataset: 100%|██████████| 23/23 [00:00<00:00, 22952.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1991.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 23/23 [00:00<00:00, 441.92 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.199655532836914, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.822, 'eval_steps_per_second': 11.822, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:13<00:06,  2.14s/it]

{'eval_loss': 0.8724716305732727, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.478, 'eval_steps_per_second': 11.478, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:20<00:00,  2.22s/it]

{'eval_loss': 0.8873103260993958, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.746, 'eval_steps_per_second': 11.746, 'epoch': 3.0}


100%|██████████| 9/9 [00:21<00:00,  2.43s/it]


{'train_runtime': 21.8635, 'train_samples_per_second': 3.156, 'train_steps_per_second': 0.412, 'train_loss': 1.317742771572537, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.68it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 181.82it/s]
Casting the dataset: 100%|██████████| 23/23 [00:00<00:00, 11480.30 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Map: 100%|██████████| 23/23 [00:00<00:00, 430.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.37 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.199655532836914, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0916, 'eval_samples_per_second': 10.913, 'eval_steps_per_second': 10.913, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:13<00:06,  2.19s/it]

{'eval_loss': 0.8724716305732727, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.681, 'eval_steps_per_second': 11.681, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:20<00:00,  2.24s/it]

{'eval_loss': 0.8873103260993958, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.891, 'eval_steps_per_second': 11.891, 'epoch': 3.0}


100%|██████████| 9/9 [00:21<00:00,  2.44s/it]


{'train_runtime': 21.9568, 'train_samples_per_second': 3.143, 'train_steps_per_second': 0.41, 'train_loss': 1.317742771572537, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.65it/s]
100%|██████████| 1/1 [00:00<00:00, 133.06it/s]
100%|██████████| 1/1 [00:00<00:00, 142.81it/s]
Casting the dataset: 100%|██████████| 24/24 [00:00<00:00, 9581.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.04 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 409.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 222.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.1994261741638184, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1087, 'eval_samples_per_second': 9.204, 'eval_steps_per_second': 9.204, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:13<00:06,  2.21s/it]

{'eval_loss': 0.8728505969047546, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.03, 'eval_steps_per_second': 12.03, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:20<00:00,  2.28s/it]

{'eval_loss': 0.8881044387817383, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.481, 'eval_steps_per_second': 11.481, 'epoch': 3.0}


100%|██████████| 9/9 [00:22<00:00,  2.46s/it]


{'train_runtime': 22.1537, 'train_samples_per_second': 3.25, 'train_steps_per_second': 0.406, 'train_loss': 1.318253517150879, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.84it/s]
100%|██████████| 1/1 [00:00<00:00, 153.60it/s]
100%|██████████| 1/1 [00:00<00:00, 142.74it/s]
Casting the dataset: 100%|██████████| 24/24 [00:00<00:00, 12002.30 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.76 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 399.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 322.14 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.1994261741638184, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0791, 'eval_samples_per_second': 12.645, 'eval_steps_per_second': 12.645, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:13<00:06,  2.26s/it]

{'eval_loss': 0.8728505969047546, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0941, 'eval_samples_per_second': 10.623, 'eval_steps_per_second': 10.623, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:21<00:00,  2.31s/it]

{'eval_loss': 0.8881044387817383, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0886, 'eval_samples_per_second': 11.285, 'eval_steps_per_second': 11.285, 'epoch': 3.0}


100%|██████████| 9/9 [00:22<00:00,  2.52s/it]


{'train_runtime': 22.6942, 'train_samples_per_second': 3.173, 'train_steps_per_second': 0.397, 'train_loss': 1.318253517150879, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
100%|██████████| 1/1 [00:00<00:00, 181.65it/s]
100%|██████████| 1/1 [00:00<00:00, 153.67it/s]
Casting the dataset: 100%|██████████| 24/24 [00:00<00:00, 24007.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.98 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 413.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 398.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 399.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.51 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.1994261741638184, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.411, 'eval_steps_per_second': 11.411, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:13<00:06,  2.21s/it]

{'eval_loss': 0.8728505969047546, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.614, 'eval_steps_per_second': 11.614, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:20<00:00,  2.27s/it]

{'eval_loss': 0.8881044387817383, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.178, 'eval_steps_per_second': 12.178, 'epoch': 3.0}


100%|██████████| 9/9 [00:22<00:00,  2.46s/it]


{'train_runtime': 22.1032, 'train_samples_per_second': 3.257, 'train_steps_per_second': 0.407, 'train_loss': 1.318253517150879, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.17it/s]
100%|██████████| 1/1 [00:00<00:00, 133.05it/s]
100%|██████████| 1/1 [00:00<00:00, 166.66it/s]
Casting the dataset: 100%|██████████| 24/24 [00:00<00:00, 11999.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 470.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.1994261741638184, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.752, 'eval_steps_per_second': 11.752, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:13<00:06,  2.29s/it]

{'eval_loss': 0.8728505969047546, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.752, 'eval_steps_per_second': 11.752, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:21<00:00,  2.34s/it]

{'eval_loss': 0.8881044387817383, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.819, 'eval_steps_per_second': 11.819, 'epoch': 3.0}


100%|██████████| 9/9 [00:22<00:00,  2.52s/it]


{'train_runtime': 22.7172, 'train_samples_per_second': 3.169, 'train_steps_per_second': 0.396, 'train_loss': 1.318253517150879, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.58it/s]
100%|██████████| 1/1 [00:00<00:00, 133.20it/s]
100%|██████████| 1/1 [00:00<00:00, 133.17it/s]
Casting the dataset: 100%|██████████| 24/24 [00:00<00:00, 9583.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 399.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.1994261741638184, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.893, 'eval_steps_per_second': 11.893, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:13<00:06,  2.21s/it]

{'eval_loss': 0.8728505969047546, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.677, 'eval_steps_per_second': 11.677, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:20<00:00,  2.25s/it]

{'eval_loss': 0.8881044387817383, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0886, 'eval_samples_per_second': 11.286, 'eval_steps_per_second': 11.286, 'epoch': 3.0}


100%|██████████| 9/9 [00:22<00:00,  2.46s/it]


{'train_runtime': 22.1074, 'train_samples_per_second': 3.257, 'train_steps_per_second': 0.407, 'train_loss': 1.318253517150879, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.49it/s]
100%|██████████| 1/1 [00:00<00:00, 153.53it/s]
100%|██████████| 1/1 [00:00<00:00, 153.76it/s]
Casting the dataset: 100%|██████████| 25/25 [00:00<00:00, 25013.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 25/25 [00:00<00:00, 405.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.83 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9366636276245117, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.957, 'eval_steps_per_second': 11.957, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:06,  1.70s/it]

{'eval_loss': 0.863883912563324, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1356, 'eval_samples_per_second': 7.372, 'eval_steps_per_second': 7.372, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.72s/it]

{'eval_loss': 0.8659307360649109, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.682, 'eval_steps_per_second': 11.682, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.07s/it]


{'train_runtime': 24.8536, 'train_samples_per_second': 3.018, 'train_steps_per_second': 0.483, 'train_loss': 1.1634592215220134, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 117.46it/s]
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
100%|██████████| 1/1 [00:00<00:00, 166.59it/s]
Casting the dataset: 100%|██████████| 25/25 [00:00<00:00, 12526.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 25/25 [00:00<00:00, 349.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.955601692199707, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.816, 'eval_steps_per_second': 11.816, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:06,  1.68s/it]

{'eval_loss': 0.8325875997543335, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0801, 'eval_samples_per_second': 12.482, 'eval_steps_per_second': 12.482, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.68s/it]

{'eval_loss': 0.8329160213470459, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.2326, 'eval_samples_per_second': 4.299, 'eval_steps_per_second': 4.299, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.07s/it]


{'train_runtime': 24.8043, 'train_samples_per_second': 3.024, 'train_steps_per_second': 0.484, 'train_loss': 1.1428711414337158, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.41it/s]
100%|██████████| 1/1 [00:00<00:00, 166.53it/s]
100%|██████████| 1/1 [00:00<00:00, 142.60it/s]
Casting the dataset: 100%|██████████| 25/25 [00:00<00:00, 12500.91 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.91 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Map: 100%|██████████| 25/25 [00:00<00:00, 394.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.955601692199707, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0781, 'eval_samples_per_second': 12.8, 'eval_steps_per_second': 12.8, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:06,  1.69s/it]

{'eval_loss': 0.8325875997543335, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0931, 'eval_samples_per_second': 10.737, 'eval_steps_per_second': 10.737, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.71s/it]

{'eval_loss': 0.8329160213470459, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0901, 'eval_samples_per_second': 11.097, 'eval_steps_per_second': 11.097, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.07s/it]


{'train_runtime': 24.8459, 'train_samples_per_second': 3.019, 'train_steps_per_second': 0.483, 'train_loss': 1.1428711414337158, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 181.35it/s]
100%|██████████| 1/1 [00:00<00:00, 166.49it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
Casting the dataset: 100%|██████████| 25/25 [00:00<00:00, 16646.71 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 25/25 [00:00<00:00, 396.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.14 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.955601692199707, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0916, 'eval_samples_per_second': 10.917, 'eval_steps_per_second': 10.917, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.63s/it]

{'eval_loss': 0.8325875997543335, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1438, 'eval_samples_per_second': 6.952, 'eval_steps_per_second': 6.952, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.72s/it]

{'eval_loss': 0.8329160213470459, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.348, 'eval_steps_per_second': 11.348, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.04s/it]


{'train_runtime': 24.4584, 'train_samples_per_second': 3.066, 'train_steps_per_second': 0.491, 'train_loss': 1.1428711414337158, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.64it/s]
100%|██████████| 1/1 [00:00<00:00, 153.67it/s]
100%|██████████| 1/1 [00:00<00:00, 153.57it/s]
Casting the dataset: 100%|██████████| 25/25 [00:00<00:00, 16573.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.81 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 25/25 [00:00<00:00, 372.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.955601692199707, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0926, 'eval_samples_per_second': 10.794, 'eval_steps_per_second': 10.794, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.65s/it]

{'eval_loss': 0.8325875997543335, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.475, 'eval_steps_per_second': 11.475, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.70s/it]

{'eval_loss': 0.8329160213470459, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0921, 'eval_samples_per_second': 10.855, 'eval_steps_per_second': 10.855, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.05s/it]


{'train_runtime': 24.6309, 'train_samples_per_second': 3.045, 'train_steps_per_second': 0.487, 'train_loss': 1.1428711414337158, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.04it/s]
100%|██████████| 1/1 [00:00<00:00, 142.58it/s]
100%|██████████| 1/1 [00:00<00:00, 133.19it/s]
Casting the dataset: 100%|██████████| 26/26 [00:00<00:00, 17307.08 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.49 examples/s]
Map: 100%|██████████| 26/26 [00:00<00:00, 363.24 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.72 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9532447457313538, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.544, 'eval_steps_per_second': 11.544, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:06,  1.72s/it]

{'eval_loss': 0.8371628522872925, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.89, 'eval_steps_per_second': 11.89, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.77s/it]

{'eval_loss': 0.8392600417137146, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0801, 'eval_samples_per_second': 12.485, 'eval_steps_per_second': 12.485, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.09s/it]


{'train_runtime': 25.036, 'train_samples_per_second': 3.116, 'train_steps_per_second': 0.479, 'train_loss': 1.1390162309010823, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.25it/s]
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
100%|██████████| 1/1 [00:00<00:00, 153.76it/s]
Casting the dataset: 100%|██████████| 26/26 [00:00<00:00, 25989.49 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.57 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 663.97 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 26/26 [00:00<00:00, 405.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.82 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9532447457313538, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.412, 'eval_steps_per_second': 11.412, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:06,  1.74s/it]

{'eval_loss': 0.8371628522872925, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0926, 'eval_samples_per_second': 10.794, 'eval_steps_per_second': 10.794, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.77s/it]

{'eval_loss': 0.8392600417137146, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0951, 'eval_samples_per_second': 10.51, 'eval_steps_per_second': 10.51, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.09s/it]


{'train_runtime': 25.1046, 'train_samples_per_second': 3.107, 'train_steps_per_second': 0.478, 'train_loss': 1.1390162309010823, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.56it/s]
100%|██████████| 1/1 [00:00<00:00, 153.67it/s]
100%|██████████| 1/1 [00:00<00:00, 153.67it/s]
Casting the dataset: 100%|██████████| 26/26 [00:00<00:00, 25989.49 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.13 examples/s]
Map: 100%|██████████| 26/26 [00:00<00:00, 405.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 320.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9532447457313538, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.96, 'eval_steps_per_second': 11.96, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:06,  1.71s/it]

{'eval_loss': 0.8371628522872925, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0776, 'eval_samples_per_second': 12.888, 'eval_steps_per_second': 12.888, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.72s/it]

{'eval_loss': 0.8392600417137146, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1236, 'eval_samples_per_second': 8.089, 'eval_steps_per_second': 8.089, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.07s/it]


{'train_runtime': 24.8356, 'train_samples_per_second': 3.141, 'train_steps_per_second': 0.483, 'train_loss': 1.1390162309010823, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.81it/s]
100%|██████████| 1/1 [00:00<00:00, 153.80it/s]
100%|██████████| 1/1 [00:00<00:00, 153.64it/s]
Casting the dataset: 100%|██████████| 26/26 [00:00<00:00, 26001.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 26/26 [00:00<00:00, 399.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9532447457313538, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.624, 'eval_steps_per_second': 11.624, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:06,  1.72s/it]

{'eval_loss': 0.8371628522872925, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0877, 'eval_samples_per_second': 11.398, 'eval_steps_per_second': 11.398, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.74s/it]

{'eval_loss': 0.8392600417137146, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0886, 'eval_samples_per_second': 11.285, 'eval_steps_per_second': 11.285, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.08s/it]


{'train_runtime': 24.982, 'train_samples_per_second': 3.122, 'train_steps_per_second': 0.48, 'train_loss': 1.1390162309010823, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 154.69it/s]
100%|██████████| 1/1 [00:00<00:00, 153.86it/s]
100%|██████████| 1/1 [00:00<00:00, 153.65it/s]
Casting the dataset: 100%|██████████| 26/26 [00:00<00:00, 12990.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 994.62 examples/s]
Map: 100%|██████████| 26/26 [00:00<00:00, 430.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.53 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 222.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9532447457313538, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0811, 'eval_samples_per_second': 12.331, 'eval_steps_per_second': 12.331, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:06,  1.73s/it]

{'eval_loss': 0.8371628522872925, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0926, 'eval_samples_per_second': 10.797, 'eval_steps_per_second': 10.797, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.76s/it]

{'eval_loss': 0.8392600417137146, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0901, 'eval_samples_per_second': 11.095, 'eval_steps_per_second': 11.095, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.07s/it]


{'train_runtime': 24.8537, 'train_samples_per_second': 3.138, 'train_steps_per_second': 0.483, 'train_loss': 1.1390162309010823, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 124.86it/s]
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
100%|██████████| 1/1 [00:00<00:00, 153.73it/s]
Casting the dataset: 100%|██████████| 27/27 [00:00<00:00, 27040.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Map: 100%|██████████| 27/27 [00:00<00:00, 365.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 188.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 330.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9520198106765747, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.89, 'eval_steps_per_second': 11.89, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.85s/it]

{'eval_loss': 0.8354960083961487, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0842, 'eval_samples_per_second': 11.875, 'eval_steps_per_second': 11.875, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.87s/it]

{'eval_loss': 0.8342736959457397, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0931, 'eval_samples_per_second': 10.738, 'eval_steps_per_second': 10.738, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.16s/it]


{'train_runtime': 25.9556, 'train_samples_per_second': 3.121, 'train_steps_per_second': 0.462, 'train_loss': 1.1419201691945393, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.63it/s]
100%|██████████| 1/1 [00:00<00:00, 153.48it/s]
100%|██████████| 1/1 [00:00<00:00, 153.66it/s]
Casting the dataset: 100%|██████████| 27/27 [00:00<00:00, 13470.47 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 398.58 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 27/27 [00:00<00:00, 361.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 330.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 321.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9520198106765747, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0837, 'eval_samples_per_second': 11.948, 'eval_steps_per_second': 11.948, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.80s/it]

{'eval_loss': 0.8354960083961487, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0921, 'eval_samples_per_second': 10.853, 'eval_steps_per_second': 10.853, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.85s/it]

{'eval_loss': 0.8342736959457397, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0923, 'eval_samples_per_second': 10.839, 'eval_steps_per_second': 10.839, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.14s/it]


{'train_runtime': 25.703, 'train_samples_per_second': 3.151, 'train_steps_per_second': 0.467, 'train_loss': 1.1419201691945393, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 155.77it/s]
100%|██████████| 1/1 [00:00<00:00, 166.44it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 27/27 [00:00<00:00, 27014.84 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 27/27 [00:00<00:00, 359.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9520198106765747, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0936, 'eval_samples_per_second': 10.683, 'eval_steps_per_second': 10.683, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.84s/it]

{'eval_loss': 0.8354960083961487, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0927, 'eval_samples_per_second': 10.784, 'eval_steps_per_second': 10.784, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.88s/it]

{'eval_loss': 0.8342736959457397, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.685, 'eval_steps_per_second': 11.685, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.16s/it]


{'train_runtime': 25.969, 'train_samples_per_second': 3.119, 'train_steps_per_second': 0.462, 'train_loss': 1.1419201691945393, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 181.45it/s]
100%|██████████| 1/1 [00:00<00:00, 133.21it/s]
100%|██████████| 1/1 [00:00<00:00, 153.69it/s]
Casting the dataset: 100%|██████████| 27/27 [00:00<00:00, 10774.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.98 examples/s]
Map: 100%|██████████| 27/27 [00:00<00:00, 390.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 222.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.9520198106765747, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0891, 'eval_samples_per_second': 11.219, 'eval_steps_per_second': 11.219, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.79s/it]

{'eval_loss': 0.8354960083961487, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1387, 'eval_samples_per_second': 7.21, 'eval_steps_per_second': 7.21, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.82s/it]

{'eval_loss': 0.8342736959457397, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1323, 'eval_samples_per_second': 7.56, 'eval_steps_per_second': 7.56, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.12s/it]


{'train_runtime': 25.4757, 'train_samples_per_second': 3.18, 'train_steps_per_second': 0.471, 'train_loss': 1.1419201691945393, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.63it/s]
100%|██████████| 1/1 [00:00<00:00, 153.59it/s]
100%|██████████| 1/1 [00:00<00:00, 142.78it/s]
Casting the dataset: 100%|██████████| 27/27 [00:00<00:00, 27008.40 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1992.54 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 27/27 [00:00<00:00, 405.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.72 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
Yo

{'eval_loss': 0.9520198106765747, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.886, 'eval_steps_per_second': 11.886, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.84s/it]

{'eval_loss': 0.8354960083961487, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.683, 'eval_steps_per_second': 11.683, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.87s/it]

{'eval_loss': 0.8342736959457397, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1302, 'eval_samples_per_second': 7.682, 'eval_steps_per_second': 7.682, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.15s/it]


{'train_runtime': 25.801, 'train_samples_per_second': 3.139, 'train_steps_per_second': 0.465, 'train_loss': 1.1419201691945393, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
100%|██████████| 1/1 [00:00<00:00, 153.58it/s]
100%|██████████| 1/1 [00:00<00:00, 153.54it/s]
Casting the dataset: 100%|██████████| 28/28 [00:00<00:00, 28055.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 375.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.82 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9527608752250671, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.908, 'eval_steps_per_second': 11.908, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.87s/it]

{'eval_loss': 0.8343704342842102, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0901, 'eval_samples_per_second': 11.095, 'eval_steps_per_second': 11.095, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  1.95s/it]

{'eval_loss': 0.8353789448738098, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.548, 'eval_steps_per_second': 11.548, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.20s/it]


{'train_runtime': 26.4493, 'train_samples_per_second': 3.176, 'train_steps_per_second': 0.454, 'train_loss': 1.1360831260681152, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 125.04it/s]
100%|██████████| 1/1 [00:00<00:00, 166.47it/s]
100%|██████████| 1/1 [00:00<00:00, 133.33it/s]
Casting the dataset: 100%|██████████| 28/28 [00:00<00:00, 14001.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.14 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.71 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 391.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9527608752250671, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.105, 'eval_steps_per_second': 12.105, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.89s/it]

{'eval_loss': 0.8343704342842102, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0901, 'eval_samples_per_second': 11.098, 'eval_steps_per_second': 11.098, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.01s/it]

{'eval_loss': 0.8353789448738098, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0892, 'eval_samples_per_second': 11.217, 'eval_steps_per_second': 11.217, 'epoch': 3.0}


100%|██████████| 12/12 [00:27<00:00,  2.26s/it]


{'train_runtime': 27.0774, 'train_samples_per_second': 3.102, 'train_steps_per_second': 0.443, 'train_loss': 1.1360831260681152, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.82it/s]
100%|██████████| 1/1 [00:00<00:00, 124.75it/s]
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
Casting the dataset: 100%|██████████| 28/28 [00:00<00:00, 27948.72 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 992.97 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.50 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 360.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9527608752250671, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.479, 'eval_steps_per_second': 11.479, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.90s/it]

{'eval_loss': 0.8343704342842102, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1047, 'eval_samples_per_second': 9.554, 'eval_steps_per_second': 9.554, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.90s/it]

{'eval_loss': 0.8353789448738098, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0791, 'eval_samples_per_second': 12.643, 'eval_steps_per_second': 12.643, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.18s/it]


{'train_runtime': 26.12, 'train_samples_per_second': 3.216, 'train_steps_per_second': 0.459, 'train_loss': 1.1360831260681152, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.10it/s]
100%|██████████| 1/1 [00:00<00:00, 153.55it/s]
100%|██████████| 1/1 [00:00<00:00, 142.77it/s]
Casting the dataset: 100%|██████████| 28/28 [00:00<00:00, 13999.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 994.15 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 358.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 248.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9527608752250671, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1266, 'eval_samples_per_second': 7.896, 'eval_steps_per_second': 7.896, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.90s/it]

{'eval_loss': 0.8343704342842102, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0922, 'eval_samples_per_second': 10.85, 'eval_steps_per_second': 10.85, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  1.93s/it]

{'eval_loss': 0.8353789448738098, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1048, 'eval_samples_per_second': 9.542, 'eval_steps_per_second': 9.542, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.22s/it]


{'train_runtime': 26.5788, 'train_samples_per_second': 3.16, 'train_steps_per_second': 0.451, 'train_loss': 1.1360831260681152, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.06it/s]
100%|██████████| 1/1 [00:00<00:00, 153.76it/s]
100%|██████████| 1/1 [00:00<00:00, 153.60it/s]
Casting the dataset: 100%|██████████| 28/28 [00:00<00:00, 18632.48 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.56 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 371.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 399.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.9527608752250671, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1017, 'eval_samples_per_second': 9.834, 'eval_steps_per_second': 9.834, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.99s/it]

{'eval_loss': 0.8343704342842102, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.962, 'eval_steps_per_second': 11.962, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  1.91s/it]

{'eval_loss': 0.8353789448738098, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0921, 'eval_samples_per_second': 10.853, 'eval_steps_per_second': 10.853, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.24s/it]


{'train_runtime': 26.8404, 'train_samples_per_second': 3.13, 'train_steps_per_second': 0.447, 'train_loss': 1.1360831260681152, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.44it/s]
100%|██████████| 1/1 [00:00<00:00, 166.59it/s]
100%|██████████| 1/1 [00:00<00:00, 166.77it/s]
Casting the dataset: 100%|██████████| 29/29 [00:00<00:00, 28995.19 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.13 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 29/29 [00:00<00:00, 365.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.31 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.952914297580719, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.887, 'eval_steps_per_second': 11.887, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.97s/it]

{'eval_loss': 0.8343604207038879, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.956, 'eval_steps_per_second': 11.956, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.04s/it]

{'eval_loss': 0.8350937366485596, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.956, 'eval_steps_per_second': 11.956, 'epoch': 3.0}


100%|██████████| 12/12 [00:27<00:00,  2.27s/it]


{'train_runtime': 27.213, 'train_samples_per_second': 3.197, 'train_steps_per_second': 0.441, 'train_loss': 1.1363881429036458, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.68it/s]
100%|██████████| 1/1 [00:00<00:00, 181.47it/s]
100%|██████████| 1/1 [00:00<00:00, 133.11it/s]
Casting the dataset: 100%|██████████| 29/29 [00:00<00:00, 14501.05 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 398.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 29/29 [00:00<00:00, 323.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.59 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.952914297580719, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0901, 'eval_samples_per_second': 11.098, 'eval_steps_per_second': 11.098, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.92s/it]

{'eval_loss': 0.8343604207038879, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.252, 'eval_steps_per_second': 12.252, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.01s/it]

{'eval_loss': 0.8350937366485596, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1071, 'eval_samples_per_second': 9.333, 'eval_steps_per_second': 9.333, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.24s/it]


{'train_runtime': 26.8396, 'train_samples_per_second': 3.241, 'train_steps_per_second': 0.447, 'train_loss': 1.1363881429036458, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.75it/s]
100%|██████████| 1/1 [00:00<00:00, 153.57it/s]
100%|██████████| 1/1 [00:00<00:00, 153.57it/s]
Casting the dataset: 100%|██████████| 29/29 [00:00<00:00, 14506.24 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Map: 100%|██████████| 29/29 [00:00<00:00, 344.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.952914297580719, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.747, 'eval_steps_per_second': 11.747, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.94s/it]

{'eval_loss': 0.8343604207038879, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.818, 'eval_steps_per_second': 11.818, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  1.94s/it]

{'eval_loss': 0.8350937366485596, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0786, 'eval_samples_per_second': 12.719, 'eval_steps_per_second': 12.719, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.25s/it]


{'train_runtime': 26.9718, 'train_samples_per_second': 3.226, 'train_steps_per_second': 0.445, 'train_loss': 1.1363881429036458, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.49it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
100%|██████████| 1/1 [00:00<00:00, 153.50it/s]
Casting the dataset: 100%|██████████| 29/29 [00:00<00:00, 14490.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 662.08 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Map: 100%|██████████| 29/29 [00:00<00:00, 355.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 398.74 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.952914297580719, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.751, 'eval_steps_per_second': 11.751, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.94s/it]

{'eval_loss': 0.8343604207038879, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0931, 'eval_samples_per_second': 10.736, 'eval_steps_per_second': 10.736, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.93s/it]

{'eval_loss': 0.8350937366485596, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.889, 'eval_steps_per_second': 11.889, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.18s/it]


{'train_runtime': 26.2124, 'train_samples_per_second': 3.319, 'train_steps_per_second': 0.458, 'train_loss': 1.1363881429036458, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.59it/s]
100%|██████████| 1/1 [00:00<00:00, 142.65it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
Casting the dataset: 100%|██████████| 29/29 [00:00<00:00, 14483.78 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 29/29 [00:00<00:00, 344.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.82 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.952914297580719, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0923, 'eval_samples_per_second': 10.838, 'eval_steps_per_second': 10.838, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.92s/it]

{'eval_loss': 0.8343604207038879, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.815, 'eval_steps_per_second': 11.815, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.94s/it]

{'eval_loss': 0.8350937366485596, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0806, 'eval_samples_per_second': 12.4, 'eval_steps_per_second': 12.4, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.20s/it]


{'train_runtime': 26.4573, 'train_samples_per_second': 3.288, 'train_steps_per_second': 0.454, 'train_loss': 1.1363881429036458, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.71it/s]
100%|██████████| 1/1 [00:00<00:00, 181.26it/s]
100%|██████████| 1/1 [00:00<00:00, 166.41it/s]
Casting the dataset: 100%|██████████| 30/30 [00:00<00:00, 29937.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 349.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 238.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 248.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9509984850883484, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.102, 'eval_steps_per_second': 12.102, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.98s/it]

{'eval_loss': 0.8321443200111389, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.478, 'eval_steps_per_second': 11.478, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  1.99s/it]

{'eval_loss': 0.8331694602966309, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.178, 'eval_steps_per_second': 12.178, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.21s/it]


{'train_runtime': 26.5086, 'train_samples_per_second': 3.395, 'train_steps_per_second': 0.453, 'train_loss': 1.136731465657552, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.56it/s]
100%|██████████| 1/1 [00:00<00:00, 153.70it/s]
100%|██████████| 1/1 [00:00<00:00, 153.77it/s]
Casting the dataset: 100%|██████████| 30/30 [00:00<00:00, 14974.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.80 examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 320.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9509984850883484, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.815, 'eval_steps_per_second': 11.815, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.95s/it]

{'eval_loss': 0.8321443200111389, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1042, 'eval_samples_per_second': 9.599, 'eval_steps_per_second': 9.599, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.00s/it]

{'eval_loss': 0.8331694602966309, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0891, 'eval_samples_per_second': 11.218, 'eval_steps_per_second': 11.218, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.23s/it]


{'train_runtime': 26.7343, 'train_samples_per_second': 3.366, 'train_steps_per_second': 0.449, 'train_loss': 1.136731465657552, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.13it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
100%|██████████| 1/1 [00:00<00:00, 142.34it/s]
Casting the dataset: 100%|██████████| 30/30 [00:00<00:00, 19941.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 662.92 examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 343.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 283.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9509984850883484, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0772, 'eval_samples_per_second': 12.95, 'eval_steps_per_second': 12.95, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.96s/it]

{'eval_loss': 0.8321443200111389, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0796, 'eval_samples_per_second': 12.56, 'eval_steps_per_second': 12.56, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.01s/it]

{'eval_loss': 0.8331694602966309, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0901, 'eval_samples_per_second': 11.101, 'eval_steps_per_second': 11.101, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.23s/it]


{'train_runtime': 26.7676, 'train_samples_per_second': 3.362, 'train_steps_per_second': 0.448, 'train_loss': 1.136731465657552, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.76it/s]
100%|██████████| 1/1 [00:00<00:00, 142.37it/s]
100%|██████████| 1/1 [00:00<00:00, 166.57it/s]
Casting the dataset: 100%|██████████| 30/30 [00:00<00:00, 14999.30 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 367.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pr

{'eval_loss': 0.9509984850883484, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.41, 'eval_steps_per_second': 11.41, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.97s/it]

{'eval_loss': 0.8321443200111389, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0801, 'eval_samples_per_second': 12.484, 'eval_steps_per_second': 12.484, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.06s/it]

{'eval_loss': 0.8331694602966309, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.544, 'eval_steps_per_second': 11.544, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.25s/it]


{'train_runtime': 26.9848, 'train_samples_per_second': 3.335, 'train_steps_per_second': 0.445, 'train_loss': 1.136731465657552, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.64it/s]
100%|██████████| 1/1 [00:00<00:00, 142.77it/s]
100%|██████████| 1/1 [00:00<00:00, 181.56it/s]
Casting the dataset: 100%|██████████| 30/30 [00:00<00:00, 14997.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.09 examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 352.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 244.72 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9509984850883484, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.029, 'eval_steps_per_second': 12.029, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.97s/it]

{'eval_loss': 0.8321443200111389, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0786, 'eval_samples_per_second': 12.722, 'eval_steps_per_second': 12.722, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  1.97s/it]

{'eval_loss': 0.8331694602966309, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.412, 'eval_steps_per_second': 11.412, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.22s/it]


{'train_runtime': 26.6095, 'train_samples_per_second': 3.382, 'train_steps_per_second': 0.451, 'train_loss': 1.136731465657552, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.76it/s]
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
100%|██████████| 1/1 [00:00<00:00, 142.93it/s]
Casting the dataset: 100%|██████████| 31/31 [00:00<00:00, 30965.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 398.40 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 31/31 [00:00<00:00, 364.34 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 398.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9532712697982788, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0891, 'eval_samples_per_second': 11.218, 'eval_steps_per_second': 11.218, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:08,  2.05s/it]

{'eval_loss': 0.8330405950546265, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0837, 'eval_samples_per_second': 11.948, 'eval_steps_per_second': 11.948, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:26<00:00,  2.09s/it]

{'eval_loss': 0.8329138159751892, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.178, 'eval_steps_per_second': 12.178, 'epoch': 3.0}


100%|██████████| 12/12 [00:27<00:00,  2.28s/it]


{'train_runtime': 27.3835, 'train_samples_per_second': 3.396, 'train_steps_per_second': 0.438, 'train_loss': 1.1350910663604736, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.77it/s]
100%|██████████| 1/1 [00:00<00:00, 153.53it/s]
100%|██████████| 1/1 [00:00<00:00, 153.55it/s]
Casting the dataset: 100%|██████████| 31/31 [00:00<00:00, 15478.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.74 examples/s]
Map: 100%|██████████| 31/31 [00:00<00:00, 309.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 222.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9532712697982788, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.174, 'eval_steps_per_second': 12.174, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:08,  2.01s/it]

{'eval_loss': 0.8330405950546265, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0827, 'eval_samples_per_second': 12.09, 'eval_steps_per_second': 12.09, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.07s/it]

{'eval_loss': 0.8329138159751892, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.959, 'eval_steps_per_second': 11.959, 'epoch': 3.0}


100%|██████████| 12/12 [00:27<00:00,  2.26s/it]


{'train_runtime': 27.1443, 'train_samples_per_second': 3.426, 'train_steps_per_second': 0.442, 'train_loss': 1.1350910663604736, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.55it/s]
100%|██████████| 1/1 [00:00<00:00, 166.35it/s]
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
Casting the dataset: 100%|██████████| 31/31 [00:00<00:00, 15469.77 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 31/31 [00:00<00:00, 299.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9532712697982788, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.886, 'eval_steps_per_second': 11.886, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:08,  2.11s/it]

{'eval_loss': 0.8330405950546265, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0921, 'eval_samples_per_second': 10.854, 'eval_steps_per_second': 10.854, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:26<00:00,  2.09s/it]

{'eval_loss': 0.8329138159751892, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.959, 'eval_steps_per_second': 11.959, 'epoch': 3.0}


100%|██████████| 12/12 [00:27<00:00,  2.30s/it]


{'train_runtime': 27.5619, 'train_samples_per_second': 3.374, 'train_steps_per_second': 0.435, 'train_loss': 1.1350910663604736, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.62it/s]
100%|██████████| 1/1 [00:00<00:00, 142.79it/s]
100%|██████████| 1/1 [00:00<00:00, 153.73it/s]
Casting the dataset: 100%|██████████| 31/31 [00:00<00:00, 15514.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.66 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 31/31 [00:00<00:00, 312.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 199.92 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9532712697982788, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.346, 'eval_steps_per_second': 11.346, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:17<00:08,  2.08s/it]

{'eval_loss': 0.8330405950546265, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0811, 'eval_samples_per_second': 12.33, 'eval_steps_per_second': 12.33, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:26<00:00,  2.13s/it]

{'eval_loss': 0.8329138159751892, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0896, 'eval_samples_per_second': 11.161, 'eval_steps_per_second': 11.161, 'epoch': 3.0}


100%|██████████| 12/12 [00:28<00:00,  2.34s/it]


{'train_runtime': 28.0874, 'train_samples_per_second': 3.311, 'train_steps_per_second': 0.427, 'train_loss': 1.1350910663604736, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.59it/s]
100%|██████████| 1/1 [00:00<00:00, 153.65it/s]
100%|██████████| 1/1 [00:00<00:00, 153.52it/s]
Casting the dataset: 100%|██████████| 31/31 [00:00<00:00, 15502.97 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 31/31 [00:00<00:00, 334.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 398.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9532712697982788, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0797, 'eval_samples_per_second': 12.547, 'eval_steps_per_second': 12.547, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:08,  2.03s/it]

{'eval_loss': 0.8330405950546265, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.543, 'eval_steps_per_second': 11.543, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.04s/it]

{'eval_loss': 0.8329138159751892, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0847, 'eval_samples_per_second': 11.802, 'eval_steps_per_second': 11.802, 'epoch': 3.0}


100%|██████████| 12/12 [00:27<00:00,  2.26s/it]


{'train_runtime': 27.1261, 'train_samples_per_second': 3.428, 'train_steps_per_second': 0.442, 'train_loss': 1.1350910663604736, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.98it/s]
100%|██████████| 1/1 [00:00<00:00, 153.76it/s]
100%|██████████| 1/1 [00:00<00:00, 153.48it/s]
Casting the dataset: 100%|██████████| 32/32 [00:00<00:00, 16010.70 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 398.55 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 316.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9541611075401306, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.252, 'eval_steps_per_second': 12.252, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:08,  2.06s/it]

{'eval_loss': 0.8337958455085754, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.816, 'eval_steps_per_second': 11.816, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:26<00:00,  2.11s/it]

{'eval_loss': 0.8333702087402344, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.028, 'eval_steps_per_second': 12.028, 'epoch': 3.0}


100%|██████████| 12/12 [00:27<00:00,  2.29s/it]


{'train_runtime': 27.5222, 'train_samples_per_second': 3.488, 'train_steps_per_second': 0.436, 'train_loss': 1.1348661581675212, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.62it/s]
100%|██████████| 1/1 [00:00<00:00, 142.66it/s]
100%|██████████| 1/1 [00:00<00:00, 133.22it/s]
Casting the dataset: 100%|██████████| 32/32 [00:00<00:00, 15985.91 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 305.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 315.93 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9541611075401306, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.612, 'eval_steps_per_second': 11.612, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:17<00:08,  2.11s/it]

{'eval_loss': 0.8337958455085754, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0927, 'eval_samples_per_second': 10.793, 'eval_steps_per_second': 10.793, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:26<00:00,  2.11s/it]

{'eval_loss': 0.8333702087402344, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.347, 'eval_steps_per_second': 11.347, 'epoch': 3.0}


100%|██████████| 12/12 [00:27<00:00,  2.31s/it]


{'train_runtime': 27.7679, 'train_samples_per_second': 3.457, 'train_steps_per_second': 0.432, 'train_loss': 1.1348661581675212, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
100%|██████████| 1/1 [00:00<00:00, 153.56it/s]
100%|██████████| 1/1 [00:00<00:00, 142.62it/s]
Casting the dataset: 100%|██████████| 32/32 [00:00<00:00, 16003.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 324.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9541611075401306, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1287, 'eval_samples_per_second': 7.768, 'eval_steps_per_second': 7.768, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:08,  2.07s/it]

{'eval_loss': 0.8337958455085754, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0807, 'eval_samples_per_second': 12.384, 'eval_steps_per_second': 12.384, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:26<00:00,  2.10s/it]

{'eval_loss': 0.8333702087402344, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.081, 'eval_samples_per_second': 12.351, 'eval_steps_per_second': 12.351, 'epoch': 3.0}


100%|██████████| 12/12 [00:27<00:00,  2.29s/it]


{'train_runtime': 27.4383, 'train_samples_per_second': 3.499, 'train_steps_per_second': 0.437, 'train_loss': 1.1348661581675212, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.55it/s]
100%|██████████| 1/1 [00:00<00:00, 153.77it/s]
100%|██████████| 1/1 [00:00<00:00, 153.64it/s]
Casting the dataset: 100%|██████████| 32/32 [00:00<00:00, 31850.43 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 324.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 399.50 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9541611075401306, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.475, 'eval_steps_per_second': 11.475, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:08,  2.11s/it]

{'eval_loss': 0.8337958455085754, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.682, 'eval_steps_per_second': 11.682, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:26<00:00,  2.11s/it]

{'eval_loss': 0.8333702087402344, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.612, 'eval_steps_per_second': 11.612, 'epoch': 3.0}


100%|██████████| 12/12 [00:27<00:00,  2.28s/it]


{'train_runtime': 27.3835, 'train_samples_per_second': 3.506, 'train_steps_per_second': 0.438, 'train_loss': 1.1348661581675212, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.78it/s]
100%|██████████| 1/1 [00:00<00:00, 153.71it/s]
100%|██████████| 1/1 [00:00<00:00, 153.67it/s]
Casting the dataset: 100%|██████████| 32/32 [00:00<00:00, 31865.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1991.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 327.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.64 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.85 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.9541611075401306, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.611, 'eval_steps_per_second': 11.611, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:08,  2.08s/it]

{'eval_loss': 0.8337958455085754, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.956, 'eval_steps_per_second': 11.956, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.11s/it]

{'eval_loss': 0.8333702087402344, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.178, 'eval_steps_per_second': 12.178, 'epoch': 3.0}


100%|██████████| 12/12 [00:27<00:00,  2.28s/it]


{'train_runtime': 27.3357, 'train_samples_per_second': 3.512, 'train_steps_per_second': 0.439, 'train_loss': 1.1348661581675212, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.72it/s]
100%|██████████| 1/1 [00:00<00:00, 166.41it/s]
100%|██████████| 1/1 [00:00<00:00, 153.62it/s]
Casting the dataset: 100%|██████████| 33/33 [00:00<00:00, 21942.30 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 33/33 [00:00<00:00, 343.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 399.61 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8462595343589783, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0797, 'eval_samples_per_second': 12.541, 'eval_steps_per_second': 12.541, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:07,  1.59s/it]

{'eval_loss': 0.8069952130317688, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.476, 'eval_steps_per_second': 11.476, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.62s/it]

{'eval_loss': 0.7855638861656189, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0921, 'eval_samples_per_second': 10.857, 'eval_steps_per_second': 10.857, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.95s/it]


{'train_runtime': 29.264, 'train_samples_per_second': 3.383, 'train_steps_per_second': 0.513, 'train_loss': 1.025915273030599, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
100%|██████████| 1/1 [00:00<00:00, 181.40it/s]
100%|██████████| 1/1 [00:00<00:00, 153.69it/s]
Casting the dataset: 100%|██████████| 33/33 [00:00<00:00, 32963.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.08 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.70 examples/s]
Map: 100%|██████████| 33/33 [00:00<00:00, 284.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.7561336755752563, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0902, 'eval_samples_per_second': 11.083, 'eval_steps_per_second': 11.083, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:07,  1.60s/it]

{'eval_loss': 0.8013245463371277, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.676, 'eval_steps_per_second': 11.676, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.61s/it]

{'eval_loss': 0.7740396857261658, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.885, 'eval_steps_per_second': 11.885, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.97s/it]


{'train_runtime': 29.5319, 'train_samples_per_second': 3.352, 'train_steps_per_second': 0.508, 'train_loss': 1.0744188944498698, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.08it/s]
100%|██████████| 1/1 [00:00<00:00, 153.63it/s]
100%|██████████| 1/1 [00:00<00:00, 95.12it/s]
Casting the dataset: 100%|██████████| 33/33 [00:00<00:00, 16493.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.73 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.98 examples/s]
Map: 100%|██████████| 33/33 [00:00<00:00, 347.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 485.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 398.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.7561336755752563, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.033, 'eval_steps_per_second': 12.033, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:17<00:07,  1.56s/it]

{'eval_loss': 0.8013245463371277, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.962, 'eval_steps_per_second': 11.962, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:27<00:00,  1.62s/it]

{'eval_loss': 0.7740396857261658, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.815, 'eval_steps_per_second': 11.815, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.94s/it]


{'train_runtime': 29.1721, 'train_samples_per_second': 3.394, 'train_steps_per_second': 0.514, 'train_loss': 1.0744188944498698, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.40it/s]
100%|██████████| 1/1 [00:00<00:00, 153.61it/s]
100%|██████████| 1/1 [00:00<00:00, 153.47it/s]
Casting the dataset: 100%|██████████| 33/33 [00:00<00:00, 32994.52 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 33/33 [00:00<00:00, 309.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 398.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7561336755752563, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0837, 'eval_samples_per_second': 11.947, 'eval_steps_per_second': 11.947, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:17<00:07,  1.57s/it]

{'eval_loss': 0.8013245463371277, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.957, 'eval_steps_per_second': 11.957, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:27<00:00,  1.60s/it]

{'eval_loss': 0.7740396857261658, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.683, 'eval_steps_per_second': 11.683, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.94s/it]


{'train_runtime': 29.1467, 'train_samples_per_second': 3.397, 'train_steps_per_second': 0.515, 'train_loss': 1.0744188944498698, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.46it/s]
100%|██████████| 1/1 [00:00<00:00, 133.07it/s]
100%|██████████| 1/1 [00:00<00:00, 153.74it/s]
Casting the dataset: 100%|██████████| 33/33 [00:00<00:00, 16477.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1992.54 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 33/33 [00:00<00:00, 310.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 399.19 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7561336755752563, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.956, 'eval_steps_per_second': 11.956, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:17<00:07,  1.57s/it]

{'eval_loss': 0.8013245463371277, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.103, 'eval_steps_per_second': 12.103, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:27<00:00,  1.60s/it]

{'eval_loss': 0.7740396857261658, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0842, 'eval_samples_per_second': 11.878, 'eval_steps_per_second': 11.878, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.94s/it]


{'train_runtime': 29.0364, 'train_samples_per_second': 3.41, 'train_steps_per_second': 0.517, 'train_loss': 1.0744188944498698, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.04it/s]
100%|██████████| 1/1 [00:00<00:00, 166.52it/s]
100%|██████████| 1/1 [00:00<00:00, 153.72it/s]
Casting the dataset: 100%|██████████| 34/34 [00:00<00:00, 13539.00 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 497.31 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 291.53 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.7578747272491455, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0884, 'eval_samples_per_second': 11.309, 'eval_steps_per_second': 11.309, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.63s/it]

{'eval_loss': 0.796183168888092, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.906, 'eval_steps_per_second': 11.906, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.65s/it]

{'eval_loss': 0.7666940093040466, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0872, 'eval_samples_per_second': 11.474, 'eval_steps_per_second': 11.474, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.97s/it]


{'train_runtime': 29.5914, 'train_samples_per_second': 3.447, 'train_steps_per_second': 0.507, 'train_loss': 1.067858378092448, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.04it/s]
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
100%|██████████| 1/1 [00:00<00:00, 153.56it/s]
Casting the dataset: 100%|██████████| 34/34 [00:00<00:00, 16991.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 992.73 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.75 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 300.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.7578747272491455, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0886, 'eval_samples_per_second': 11.286, 'eval_steps_per_second': 11.286, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.65s/it]

{'eval_loss': 0.796183168888092, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0926, 'eval_samples_per_second': 10.795, 'eval_steps_per_second': 10.795, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.72s/it]

{'eval_loss': 0.7666940093040466, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1057, 'eval_samples_per_second': 9.463, 'eval_steps_per_second': 9.463, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.01s/it]


{'train_runtime': 30.1458, 'train_samples_per_second': 3.384, 'train_steps_per_second': 0.498, 'train_loss': 1.067858378092448, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.53it/s]
100%|██████████| 1/1 [00:00<00:00, 199.93it/s]
100%|██████████| 1/1 [00:00<00:00, 133.00it/s]
Casting the dataset: 100%|██████████| 34/34 [00:00<00:00, 13562.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 994.15 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 292.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.7578747272491455, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.82, 'eval_steps_per_second': 11.82, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.66s/it]

{'eval_loss': 0.796183168888092, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.814, 'eval_steps_per_second': 11.814, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.68s/it]

{'eval_loss': 0.7666940093040466, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0906, 'eval_samples_per_second': 11.035, 'eval_steps_per_second': 11.035, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.99s/it]


{'train_runtime': 29.877, 'train_samples_per_second': 3.414, 'train_steps_per_second': 0.502, 'train_loss': 1.067858378092448, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.47it/s]
100%|██████████| 1/1 [00:00<00:00, 166.53it/s]
100%|██████████| 1/1 [00:00<00:00, 153.77it/s]
Casting the dataset: 100%|██████████| 34/34 [00:00<00:00, 16956.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 296.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 399.19 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.7578747272491455, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0796, 'eval_samples_per_second': 12.557, 'eval_steps_per_second': 12.557, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.65s/it]

{'eval_loss': 0.796183168888092, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.886, 'eval_steps_per_second': 11.886, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.69s/it]

{'eval_loss': 0.7666940093040466, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.345, 'eval_steps_per_second': 11.345, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.00s/it]


{'train_runtime': 30.0102, 'train_samples_per_second': 3.399, 'train_steps_per_second': 0.5, 'train_loss': 1.067858378092448, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.28it/s]
100%|██████████| 1/1 [00:00<00:00, 153.72it/s]
100%|██████████| 1/1 [00:00<00:00, 153.43it/s]
Casting the dataset: 100%|██████████| 34/34 [00:00<00:00, 17005.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1985.94 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 291.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 398.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 293.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.7578747272491455, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.887, 'eval_steps_per_second': 11.887, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.66s/it]

{'eval_loss': 0.796183168888092, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0842, 'eval_samples_per_second': 11.883, 'eval_steps_per_second': 11.883, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.64s/it]

{'eval_loss': 0.7666940093040466, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0786, 'eval_samples_per_second': 12.717, 'eval_steps_per_second': 12.717, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.97s/it]


{'train_runtime': 29.5689, 'train_samples_per_second': 3.45, 'train_steps_per_second': 0.507, 'train_loss': 1.067858378092448, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.65it/s]
100%|██████████| 1/1 [00:00<00:00, 181.60it/s]
100%|██████████| 1/1 [00:00<00:00, 153.50it/s]
Casting the dataset: 100%|██████████| 35/35 [00:00<00:00, 17472.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 304.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 304.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.7581020593643188, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.745, 'eval_steps_per_second': 11.745, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.72s/it]

{'eval_loss': 0.7973778247833252, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1268, 'eval_samples_per_second': 7.884, 'eval_steps_per_second': 7.884, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.71s/it]

{'eval_loss': 0.7671562433242798, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1312, 'eval_samples_per_second': 7.621, 'eval_steps_per_second': 7.621, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.02s/it]


{'train_runtime': 30.3632, 'train_samples_per_second': 3.458, 'train_steps_per_second': 0.494, 'train_loss': 1.0652095794677734, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.45it/s]
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
100%|██████████| 1/1 [00:00<00:00, 153.50it/s]
Casting the dataset: 100%|██████████| 35/35 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 993.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 296.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.81 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.7581020593643188, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1132, 'eval_samples_per_second': 8.835, 'eval_steps_per_second': 8.835, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.68s/it]

{'eval_loss': 0.7973778247833252, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0847, 'eval_samples_per_second': 11.809, 'eval_steps_per_second': 11.809, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.72s/it]

{'eval_loss': 0.7671562433242798, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1147, 'eval_samples_per_second': 8.719, 'eval_steps_per_second': 8.719, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.02s/it]


{'train_runtime': 30.2666, 'train_samples_per_second': 3.469, 'train_steps_per_second': 0.496, 'train_loss': 1.0652095794677734, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.66it/s]
100%|██████████| 1/1 [00:00<00:00, 148.17it/s]
100%|██████████| 1/1 [00:00<00:00, 142.76it/s]
Casting the dataset: 100%|██████████| 35/35 [00:00<00:00, 17507.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.13 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 290.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 261.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.7581020593643188, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1362, 'eval_samples_per_second': 7.34, 'eval_steps_per_second': 7.34, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.71s/it]

{'eval_loss': 0.7973778247833252, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0791, 'eval_samples_per_second': 12.642, 'eval_steps_per_second': 12.642, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.71s/it]

{'eval_loss': 0.7671562433242798, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1232, 'eval_samples_per_second': 8.118, 'eval_steps_per_second': 8.118, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.02s/it]


{'train_runtime': 30.2844, 'train_samples_per_second': 3.467, 'train_steps_per_second': 0.495, 'train_loss': 1.0652095794677734, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.47it/s]
100%|██████████| 1/1 [00:00<00:00, 153.68it/s]
100%|██████████| 1/1 [00:00<00:00, 153.45it/s]
Casting the dataset: 100%|██████████| 35/35 [00:00<00:00, 17497.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 994.85 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.51 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 278.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7581020593643188, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1112, 'eval_samples_per_second': 8.996, 'eval_steps_per_second': 8.996, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.70s/it]

{'eval_loss': 0.7973778247833252, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1097, 'eval_samples_per_second': 9.118, 'eval_steps_per_second': 9.118, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.75s/it]

{'eval_loss': 0.7671562433242798, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1187, 'eval_samples_per_second': 8.425, 'eval_steps_per_second': 8.425, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.03s/it]


{'train_runtime': 30.501, 'train_samples_per_second': 3.443, 'train_steps_per_second': 0.492, 'train_loss': 1.0652095794677734, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.76it/s]
100%|██████████| 1/1 [00:00<00:00, 163.34it/s]
100%|██████████| 1/1 [00:00<00:00, 166.53it/s]
Casting the dataset: 100%|██████████| 35/35 [00:00<00:00, 34762.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.27 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 312.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7581020593643188, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.546, 'eval_steps_per_second': 11.546, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.74s/it]

{'eval_loss': 0.7973778247833252, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0867, 'eval_samples_per_second': 11.541, 'eval_steps_per_second': 11.541, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.74s/it]

{'eval_loss': 0.7671562433242798, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1267, 'eval_samples_per_second': 7.891, 'eval_steps_per_second': 7.891, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.04s/it]


{'train_runtime': 30.6673, 'train_samples_per_second': 3.424, 'train_steps_per_second': 0.489, 'train_loss': 1.0652095794677734, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.05it/s]
100%|██████████| 1/1 [00:00<00:00, 153.82it/s]
100%|██████████| 1/1 [00:00<00:00, 153.53it/s]
Casting the dataset: 100%|██████████| 36/36 [00:00<00:00, 17988.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 297.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 399.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pr

{'eval_loss': 0.758878231048584, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.677, 'eval_steps_per_second': 11.677, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:08,  1.76s/it]

{'eval_loss': 0.7957702875137329, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.03, 'eval_steps_per_second': 12.03, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.77s/it]

{'eval_loss': 0.7648932933807373, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0771, 'eval_samples_per_second': 12.969, 'eval_steps_per_second': 12.969, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.05s/it]


{'train_runtime': 30.7444, 'train_samples_per_second': 3.513, 'train_steps_per_second': 0.488, 'train_loss': 1.064437166849772, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.76it/s]
100%|██████████| 1/1 [00:00<00:00, 153.59it/s]
100%|██████████| 1/1 [00:00<00:00, 153.50it/s]
Casting the dataset: 100%|██████████| 36/36 [00:00<00:00, 17986.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 256.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 398.74 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.758878231048584, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.095, 'eval_samples_per_second': 10.524, 'eval_steps_per_second': 10.524, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.81s/it]

{'eval_loss': 0.7957702875137329, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.611, 'eval_steps_per_second': 11.611, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.77s/it]

{'eval_loss': 0.7648932933807373, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.888, 'eval_steps_per_second': 11.888, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.08s/it]


{'train_runtime': 31.2558, 'train_samples_per_second': 3.455, 'train_steps_per_second': 0.48, 'train_loss': 1.064437166849772, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.17it/s]
100%|██████████| 1/1 [00:00<00:00, 133.13it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
Casting the dataset: 100%|██████████| 36/36 [00:00<00:00, 18009.89 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 398.77 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 286.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.758878231048584, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0786, 'eval_samples_per_second': 12.715, 'eval_steps_per_second': 12.715, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:08,  1.78s/it]

{'eval_loss': 0.7957702875137329, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.479, 'eval_steps_per_second': 11.479, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.78s/it]

{'eval_loss': 0.7648932933807373, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.893, 'eval_steps_per_second': 11.893, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.08s/it]


{'train_runtime': 31.1359, 'train_samples_per_second': 3.469, 'train_steps_per_second': 0.482, 'train_loss': 1.064437166849772, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.59it/s]
100%|██████████| 1/1 [00:00<00:00, 166.05it/s]
100%|██████████| 1/1 [00:00<00:00, 181.64it/s]
Casting the dataset: 100%|██████████| 36/36 [00:00<00:00, 17960.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 264.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.758878231048584, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.412, 'eval_steps_per_second': 11.412, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.85s/it]

{'eval_loss': 0.7957702875137329, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.823, 'eval_steps_per_second': 11.823, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.80s/it]

{'eval_loss': 0.7648932933807373, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.034, 'eval_steps_per_second': 12.034, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.08s/it]


{'train_runtime': 31.2648, 'train_samples_per_second': 3.454, 'train_steps_per_second': 0.48, 'train_loss': 1.064437166849772, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.55it/s]
100%|██████████| 1/1 [00:00<00:00, 153.65it/s]
100%|██████████| 1/1 [00:00<00:00, 166.55it/s]
Casting the dataset: 100%|██████████| 36/36 [00:00<00:00, 17982.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.26 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.45 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 281.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.758878231048584, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.251, 'eval_steps_per_second': 12.251, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:08,  1.76s/it]

{'eval_loss': 0.7957702875137329, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.751, 'eval_steps_per_second': 11.751, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.77s/it]

{'eval_loss': 0.7648932933807373, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0781, 'eval_samples_per_second': 12.805, 'eval_steps_per_second': 12.805, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.07s/it]


{'train_runtime': 31.0644, 'train_samples_per_second': 3.477, 'train_steps_per_second': 0.483, 'train_loss': 1.064437166849772, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.76it/s]
100%|██████████| 1/1 [00:00<00:00, 153.67it/s]
100%|██████████| 1/1 [00:00<00:00, 142.79it/s]
Casting the dataset: 100%|██████████| 37/37 [00:00<00:00, 14763.06 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 994.62 examples/s]
Map: 100%|██████████| 37/37 [00:00<00:00, 280.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 398.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.35 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.7584471702575684, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.03, 'eval_steps_per_second': 12.03, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.83s/it]

{'eval_loss': 0.7950179576873779, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.957, 'eval_steps_per_second': 11.957, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.86s/it]

{'eval_loss': 0.7637616395950317, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.892, 'eval_steps_per_second': 11.892, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.10s/it]


{'train_runtime': 31.5633, 'train_samples_per_second': 3.517, 'train_steps_per_second': 0.475, 'train_loss': 1.0642712275187174, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.66it/s]
100%|██████████| 1/1 [00:00<00:00, 166.53it/s]
100%|██████████| 1/1 [00:00<00:00, 133.29it/s]
Casting the dataset: 100%|██████████| 37/37 [00:00<00:00, 18501.34 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 398.43 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 37/37 [00:00<00:00, 286.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7584471702575684, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.678, 'eval_steps_per_second': 11.678, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.83s/it]

{'eval_loss': 0.7950179576873779, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.08, 'eval_samples_per_second': 12.494, 'eval_steps_per_second': 12.494, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.91s/it]

{'eval_loss': 0.7637616395950317, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.614, 'eval_steps_per_second': 11.614, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.12s/it]


{'train_runtime': 31.763, 'train_samples_per_second': 3.495, 'train_steps_per_second': 0.472, 'train_loss': 1.0642712275187174, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.77it/s]
100%|██████████| 1/1 [00:00<00:00, 181.51it/s]
100%|██████████| 1/1 [00:00<00:00, 142.70it/s]
Casting the dataset: 100%|██████████| 37/37 [00:00<00:00, 18457.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 37/37 [00:00<00:00, 274.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 398.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7584471702575684, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.887, 'eval_steps_per_second': 11.887, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.88s/it]

{'eval_loss': 0.7950179576873779, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0842, 'eval_samples_per_second': 11.873, 'eval_steps_per_second': 11.873, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.86s/it]

{'eval_loss': 0.7637616395950317, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.41, 'eval_steps_per_second': 11.41, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.12s/it]


{'train_runtime': 31.7602, 'train_samples_per_second': 3.495, 'train_steps_per_second': 0.472, 'train_loss': 1.0642712275187174, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.77it/s]
100%|██████████| 1/1 [00:00<00:00, 142.77it/s]
100%|██████████| 1/1 [00:00<00:00, 153.63it/s]
Casting the dataset: 100%|██████████| 37/37 [00:00<00:00, 24625.40 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Map: 100%|██████████| 37/37 [00:00<00:00, 264.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 311.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 283.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.7584471702575684, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.609, 'eval_steps_per_second': 11.609, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.85s/it]

{'eval_loss': 0.7950179576873779, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0971, 'eval_samples_per_second': 10.295, 'eval_steps_per_second': 10.295, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.86s/it]

{'eval_loss': 0.7637616395950317, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.033, 'eval_steps_per_second': 12.033, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.11s/it]


{'train_runtime': 31.6758, 'train_samples_per_second': 3.504, 'train_steps_per_second': 0.474, 'train_loss': 1.0642712275187174, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
100%|██████████| 1/1 [00:00<00:00, 181.39it/s]
100%|██████████| 1/1 [00:00<00:00, 166.43it/s]
Casting the dataset: 100%|██████████| 37/37 [00:00<00:00, 24582.49 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.33 examples/s]
Map: 100%|██████████| 37/37 [00:00<00:00, 283.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 320.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7584471702575684, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.888, 'eval_steps_per_second': 11.888, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.88s/it]

{'eval_loss': 0.7950179576873779, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.547, 'eval_steps_per_second': 11.547, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.88s/it]

{'eval_loss': 0.7637616395950317, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1071, 'eval_samples_per_second': 9.334, 'eval_steps_per_second': 9.334, 'epoch': 3.0}


100%|██████████| 15/15 [00:32<00:00,  2.13s/it]


{'train_runtime': 32.0123, 'train_samples_per_second': 3.467, 'train_steps_per_second': 0.469, 'train_loss': 1.0642712275187174, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 181.42it/s]
100%|██████████| 1/1 [00:00<00:00, 142.67it/s]
100%|██████████| 1/1 [00:00<00:00, 153.78it/s]
Casting the dataset: 100%|██████████| 38/38 [00:00<00:00, 18981.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 38/38 [00:00<00:00, 276.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7583792805671692, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0801, 'eval_samples_per_second': 12.481, 'eval_steps_per_second': 12.481, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.90s/it]

{'eval_loss': 0.7925807237625122, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.036, 'eval_steps_per_second': 12.036, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.94s/it]

{'eval_loss': 0.7606817483901978, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.066, 'eval_steps_per_second': 12.066, 'epoch': 3.0}


100%|██████████| 15/15 [00:32<00:00,  2.15s/it]


{'train_runtime': 32.2699, 'train_samples_per_second': 3.533, 'train_steps_per_second': 0.465, 'train_loss': 1.0639575958251952, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.70it/s]
100%|██████████| 1/1 [00:00<00:00, 153.74it/s]
100%|██████████| 1/1 [00:00<00:00, 153.70it/s]
Casting the dataset: 100%|██████████| 38/38 [00:00<00:00, 18996.85 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 399.27 examples/s]
Map: 100%|██████████| 38/38 [00:00<00:00, 270.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 240.10 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.7583792805671692, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0936, 'eval_samples_per_second': 10.682, 'eval_steps_per_second': 10.682, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.94s/it]

{'eval_loss': 0.7925807237625122, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.546, 'eval_steps_per_second': 11.546, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:31<00:00,  1.94s/it]

{'eval_loss': 0.7606817483901978, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.965, 'eval_steps_per_second': 11.965, 'epoch': 3.0}


100%|██████████| 15/15 [00:32<00:00,  2.17s/it]


{'train_runtime': 32.5785, 'train_samples_per_second': 3.499, 'train_steps_per_second': 0.46, 'train_loss': 1.0639575958251952, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 181.64it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 153.71it/s]
Casting the dataset: 100%|██████████| 38/38 [00:00<00:00, 19001.38 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.67 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 38/38 [00:00<00:00, 292.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.7583792805671692, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.482, 'eval_steps_per_second': 11.482, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.91s/it]

{'eval_loss': 0.7925807237625122, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0781, 'eval_samples_per_second': 12.801, 'eval_steps_per_second': 12.801, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.90s/it]

{'eval_loss': 0.7606817483901978, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1152, 'eval_samples_per_second': 8.683, 'eval_steps_per_second': 8.683, 'epoch': 3.0}


100%|██████████| 15/15 [00:32<00:00,  2.14s/it]


{'train_runtime': 32.0354, 'train_samples_per_second': 3.559, 'train_steps_per_second': 0.468, 'train_loss': 1.0639575958251952, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.59it/s]
100%|██████████| 1/1 [00:00<00:00, 142.79it/s]
100%|██████████| 1/1 [00:00<00:00, 153.74it/s]
Casting the dataset: 100%|██████████| 38/38 [00:00<00:00, 19014.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 398.70 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 38/38 [00:00<00:00, 289.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 315.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.7583792805671692, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0787, 'eval_samples_per_second': 12.709, 'eval_steps_per_second': 12.709, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.94s/it]

{'eval_loss': 0.7925807237625122, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0781, 'eval_samples_per_second': 12.798, 'eval_steps_per_second': 12.798, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:31<00:00,  1.95s/it]

{'eval_loss': 0.7606817483901978, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0782, 'eval_samples_per_second': 12.782, 'eval_steps_per_second': 12.782, 'epoch': 3.0}


100%|██████████| 15/15 [00:32<00:00,  2.16s/it]


{'train_runtime': 32.4511, 'train_samples_per_second': 3.513, 'train_steps_per_second': 0.462, 'train_loss': 1.0639575958251952, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.28it/s]
100%|██████████| 1/1 [00:00<00:00, 153.52it/s]
100%|██████████| 1/1 [00:00<00:00, 153.83it/s]
Casting the dataset: 100%|██████████| 38/38 [00:00<00:00, 19001.38 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.32 examples/s]
Map: 100%|██████████| 38/38 [00:00<00:00, 272.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 330.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.7583792805671692, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1066, 'eval_samples_per_second': 9.379, 'eval_steps_per_second': 9.379, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.91s/it]

{'eval_loss': 0.7925807237625122, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0786, 'eval_samples_per_second': 12.719, 'eval_steps_per_second': 12.719, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:31<00:00,  1.94s/it]

{'eval_loss': 0.7606817483901978, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.25, 'eval_steps_per_second': 12.25, 'epoch': 3.0}


100%|██████████| 15/15 [00:32<00:00,  2.16s/it]


{'train_runtime': 32.3684, 'train_samples_per_second': 3.522, 'train_steps_per_second': 0.463, 'train_loss': 1.0639575958251952, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 161.22it/s]
100%|██████████| 1/1 [00:00<00:00, 166.47it/s]
100%|██████████| 1/1 [00:00<00:00, 166.59it/s]
Casting the dataset: 100%|██████████| 39/39 [00:00<00:00, 19448.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.08 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 39/39 [00:00<00:00, 300.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.7581155896186829, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.887, 'eval_steps_per_second': 11.887, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.98s/it]

{'eval_loss': 0.7928810715675354, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0807, 'eval_samples_per_second': 12.386, 'eval_steps_per_second': 12.386, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:31<00:00,  2.01s/it]

{'eval_loss': 0.7614564895629883, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.481, 'eval_steps_per_second': 11.481, 'epoch': 3.0}


100%|██████████| 15/15 [00:32<00:00,  2.19s/it]


{'train_runtime': 32.8331, 'train_samples_per_second': 3.563, 'train_steps_per_second': 0.457, 'train_loss': 1.0639166514078775, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.72it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 133.14it/s]
Casting the dataset: 100%|██████████| 39/39 [00:00<00:00, 38993.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.97 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 39/39 [00:00<00:00, 272.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7581155896186829, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.412, 'eval_steps_per_second': 11.412, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.97s/it]

{'eval_loss': 0.7928810715675354, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.822, 'eval_steps_per_second': 11.822, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:31<00:00,  1.99s/it]

{'eval_loss': 0.7614564895629883, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0791, 'eval_samples_per_second': 12.64, 'eval_steps_per_second': 12.64, 'epoch': 3.0}


100%|██████████| 15/15 [00:32<00:00,  2.19s/it]


{'train_runtime': 32.8332, 'train_samples_per_second': 3.563, 'train_steps_per_second': 0.457, 'train_loss': 1.0639166514078775, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.56it/s]
100%|██████████| 1/1 [00:00<00:00, 142.60it/s]
100%|██████████| 1/1 [00:00<00:00, 140.65it/s]
Casting the dataset: 100%|██████████| 39/39 [00:00<00:00, 19510.72 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 39/39 [00:00<00:00, 276.31 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pr

{'eval_loss': 0.7581155896186829, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.355, 'eval_steps_per_second': 11.355, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:10,  2.00s/it]

{'eval_loss': 0.7928810715675354, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.35, 'eval_steps_per_second': 11.35, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:31<00:00,  2.02s/it]

{'eval_loss': 0.7614564895629883, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.681, 'eval_steps_per_second': 11.681, 'epoch': 3.0}


100%|██████████| 15/15 [00:33<00:00,  2.20s/it]


{'train_runtime': 33.0659, 'train_samples_per_second': 3.538, 'train_steps_per_second': 0.454, 'train_loss': 1.0639166514078775, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.57it/s]
100%|██████████| 1/1 [00:00<00:00, 153.57it/s]
100%|██████████| 1/1 [00:00<00:00, 133.22it/s]
Casting the dataset: 100%|██████████| 39/39 [00:00<00:00, 19494.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.09 examples/s]
Map: 100%|██████████| 39/39 [00:00<00:00, 279.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.7581155896186829, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0891, 'eval_samples_per_second': 11.218, 'eval_steps_per_second': 11.218, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.97s/it]

{'eval_loss': 0.7928810715675354, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0789, 'eval_samples_per_second': 12.676, 'eval_steps_per_second': 12.676, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:31<00:00,  1.98s/it]

{'eval_loss': 0.7614564895629883, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.679, 'eval_steps_per_second': 11.679, 'epoch': 3.0}


100%|██████████| 15/15 [00:32<00:00,  2.18s/it]


{'train_runtime': 32.7288, 'train_samples_per_second': 3.575, 'train_steps_per_second': 0.458, 'train_loss': 1.0639166514078775, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.64it/s]
100%|██████████| 1/1 [00:00<00:00, 181.36it/s]
100%|██████████| 1/1 [00:00<00:00, 142.79it/s]
Casting the dataset: 100%|██████████| 39/39 [00:00<00:00, 19475.87 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 39/39 [00:00<00:00, 284.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7581155896186829, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0901, 'eval_samples_per_second': 11.098, 'eval_steps_per_second': 11.098, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.97s/it]

{'eval_loss': 0.7928810715675354, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.959, 'eval_steps_per_second': 11.959, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:31<00:00,  2.05s/it]

{'eval_loss': 0.7614564895629883, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0801, 'eval_samples_per_second': 12.483, 'eval_steps_per_second': 12.483, 'epoch': 3.0}


100%|██████████| 15/15 [00:33<00:00,  2.21s/it]


{'train_runtime': 33.2207, 'train_samples_per_second': 3.522, 'train_steps_per_second': 0.452, 'train_loss': 1.0639166514078775, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.71it/s]
100%|██████████| 1/1 [00:00<00:00, 166.55it/s]
100%|██████████| 1/1 [00:00<00:00, 142.58it/s]
Casting the dataset: 100%|██████████| 40/40 [00:00<00:00, 19987.15 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.73 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 278.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.74 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.7577984929084778, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0791, 'eval_samples_per_second': 12.643, 'eval_steps_per_second': 12.643, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:10,  2.06s/it]

{'eval_loss': 0.793655276298523, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.61, 'eval_steps_per_second': 11.61, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:31<00:00,  2.04s/it]

{'eval_loss': 0.7627779841423035, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.181, 'eval_steps_per_second': 12.181, 'epoch': 3.0}


100%|██████████| 15/15 [00:33<00:00,  2.22s/it]


{'train_runtime': 33.2752, 'train_samples_per_second': 3.606, 'train_steps_per_second': 0.451, 'train_loss': 1.0642225901285807, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.68it/s]
100%|██████████| 1/1 [00:00<00:00, 166.59it/s]
100%|██████████| 1/1 [00:00<00:00, 86.89it/s]
Casting the dataset: 100%|██████████| 40/40 [00:00<00:00, 19951.50 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 666.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 273.64 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.73 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.7577984929084778, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.888, 'eval_steps_per_second': 11.888, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:21<00:10,  2.06s/it]

{'eval_loss': 0.793655276298523, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.547, 'eval_steps_per_second': 11.547, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:32<00:00,  2.09s/it]

{'eval_loss': 0.7627779841423035, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.96, 'eval_steps_per_second': 11.96, 'epoch': 3.0}


100%|██████████| 15/15 [00:33<00:00,  2.25s/it]


{'train_runtime': 33.7424, 'train_samples_per_second': 3.556, 'train_steps_per_second': 0.445, 'train_loss': 1.0642225901285807, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.58it/s]
100%|██████████| 1/1 [00:00<00:00, 153.59it/s]
100%|██████████| 1/1 [00:00<00:00, 142.24it/s]
Casting the dataset: 100%|██████████| 40/40 [00:00<00:00, 19951.50 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 902.58 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 289.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.7577984929084778, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.105, 'eval_steps_per_second': 12.105, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:10,  2.04s/it]

{'eval_loss': 0.793655276298523, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0906, 'eval_samples_per_second': 11.036, 'eval_steps_per_second': 11.036, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:31<00:00,  2.05s/it]

{'eval_loss': 0.7627779841423035, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.415, 'eval_steps_per_second': 11.415, 'epoch': 3.0}


100%|██████████| 15/15 [00:33<00:00,  2.21s/it]


{'train_runtime': 33.2011, 'train_samples_per_second': 3.614, 'train_steps_per_second': 0.452, 'train_loss': 1.0642225901285807, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.60it/s]
100%|██████████| 1/1 [00:00<00:00, 133.01it/s]
100%|██████████| 1/1 [00:00<00:00, 105.05it/s]
Casting the dataset: 100%|██████████| 40/40 [00:00<00:00, 19999.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 279.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 288.47 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.73 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7577984929084778, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.703, 'eval_steps_per_second': 11.703, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:10,  2.05s/it]

{'eval_loss': 0.793655276298523, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0901, 'eval_samples_per_second': 11.096, 'eval_steps_per_second': 11.096, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:32<00:00,  2.14s/it]

{'eval_loss': 0.7627779841423035, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.544, 'eval_steps_per_second': 11.544, 'epoch': 3.0}


100%|██████████| 15/15 [00:33<00:00,  2.26s/it]


{'train_runtime': 33.9181, 'train_samples_per_second': 3.538, 'train_steps_per_second': 0.442, 'train_loss': 1.0642225901285807, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.17it/s]
100%|██████████| 1/1 [00:00<00:00, 153.69it/s]
100%|██████████| 1/1 [00:00<00:00, 153.59it/s]
Casting the dataset: 100%|██████████| 40/40 [00:00<00:00, 19972.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 300.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.7577984929084778, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.549, 'eval_steps_per_second': 11.549, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:10,  2.03s/it]

{'eval_loss': 0.793655276298523, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0857, 'eval_samples_per_second': 11.674, 'eval_steps_per_second': 11.674, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:31<00:00,  2.07s/it]

{'eval_loss': 0.7627779841423035, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.958, 'eval_steps_per_second': 11.958, 'epoch': 3.0}


100%|██████████| 15/15 [00:33<00:00,  2.21s/it]


{'train_runtime': 33.2093, 'train_samples_per_second': 3.613, 'train_steps_per_second': 0.452, 'train_loss': 1.0642225901285807, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.57it/s]
100%|██████████| 1/1 [00:00<00:00, 153.81it/s]
100%|██████████| 1/1 [00:00<00:00, 153.50it/s]
Casting the dataset: 100%|██████████| 41/41 [00:00<00:00, 16365.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.87 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.56 examples/s]
Map: 100%|██████████| 41/41 [00:00<00:00, 259.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.7156471610069275, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0796, 'eval_samples_per_second': 12.561, 'eval_steps_per_second': 12.561, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:09,  1.64s/it]

{'eval_loss': 0.7588290572166443, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1491, 'eval_samples_per_second': 6.705, 'eval_steps_per_second': 6.705, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:34<00:00,  1.62s/it]

{'eval_loss': 0.7173526287078857, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1022, 'eval_samples_per_second': 9.784, 'eval_steps_per_second': 9.784, 'epoch': 3.0}


100%|██████████| 18/18 [00:35<00:00,  2.00s/it]


{'train_runtime': 35.9768, 'train_samples_per_second': 3.419, 'train_steps_per_second': 0.5, 'train_loss': 0.9750091764662001, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.56it/s]
100%|██████████| 1/1 [00:00<00:00, 133.14it/s]
100%|██████████| 1/1 [00:00<00:00, 73.96it/s]
Casting the dataset: 100%|██████████| 41/41 [00:00<00:00, 41002.97 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.33 examples/s]
Map: 100%|██████████| 41/41 [00:00<00:00, 271.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 237.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.658993124961853, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.174, 'eval_steps_per_second': 12.174, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.68s/it]

{'eval_loss': 0.6183078289031982, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0891, 'eval_samples_per_second': 11.217, 'eval_steps_per_second': 11.217, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.66s/it]

{'eval_loss': 0.6169606447219849, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.349, 'eval_steps_per_second': 11.349, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.06s/it]


{'train_runtime': 37.014, 'train_samples_per_second': 3.323, 'train_steps_per_second': 0.486, 'train_loss': 1.137582778930664, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.71it/s]
100%|██████████| 1/1 [00:00<00:00, 166.59it/s]
100%|██████████| 1/1 [00:00<00:00, 153.54it/s]
Casting the dataset: 100%|██████████| 41/41 [00:00<00:00, 40993.20 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 398.96 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 41/41 [00:00<00:00, 280.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.658993124961853, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0902, 'eval_samples_per_second': 11.092, 'eval_steps_per_second': 11.092, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:09,  1.66s/it]

{'eval_loss': 0.6183078289031982, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0906, 'eval_samples_per_second': 11.034, 'eval_steps_per_second': 11.034, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.69s/it]

{'eval_loss': 0.6169606447219849, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0901, 'eval_samples_per_second': 11.096, 'eval_steps_per_second': 11.096, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.06s/it]


{'train_runtime': 37.091, 'train_samples_per_second': 3.316, 'train_steps_per_second': 0.485, 'train_loss': 1.137582778930664, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.18it/s]
100%|██████████| 1/1 [00:00<00:00, 153.54it/s]
100%|██████████| 1/1 [00:00<00:00, 166.41it/s]
Casting the dataset: 100%|██████████| 41/41 [00:00<00:00, 20513.71 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.60 examples/s]
Map: 100%|██████████| 41/41 [00:00<00:00, 281.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 399.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.658993124961853, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0901, 'eval_samples_per_second': 11.093, 'eval_steps_per_second': 11.093, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:09,  1.65s/it]

{'eval_loss': 0.6183078289031982, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0891, 'eval_samples_per_second': 11.225, 'eval_steps_per_second': 11.225, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.68s/it]

{'eval_loss': 0.6169606447219849, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0842, 'eval_samples_per_second': 11.88, 'eval_steps_per_second': 11.88, 'epoch': 3.0}


100%|██████████| 18/18 [00:36<00:00,  2.03s/it]


{'train_runtime': 36.6217, 'train_samples_per_second': 3.359, 'train_steps_per_second': 0.492, 'train_loss': 1.137582778930664, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.66it/s]
100%|██████████| 1/1 [00:00<00:00, 153.74it/s]
100%|██████████| 1/1 [00:00<00:00, 166.66it/s]
Casting the dataset: 100%|██████████| 41/41 [00:00<00:00, 27274.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 41/41 [00:00<00:00, 277.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 399.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.658993124961853, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0812, 'eval_samples_per_second': 12.314, 'eval_steps_per_second': 12.314, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.71s/it]

{'eval_loss': 0.6183078289031982, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0906, 'eval_samples_per_second': 11.037, 'eval_steps_per_second': 11.037, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.70s/it]

{'eval_loss': 0.6169606447219849, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1407, 'eval_samples_per_second': 7.109, 'eval_steps_per_second': 7.109, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.06s/it]


{'train_runtime': 37.1521, 'train_samples_per_second': 3.311, 'train_steps_per_second': 0.484, 'train_loss': 1.137582778930664, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.41it/s]
100%|██████████| 1/1 [00:00<00:00, 153.50it/s]
100%|██████████| 1/1 [00:00<00:00, 142.51it/s]
Casting the dataset: 100%|██████████| 42/42 [00:00<00:00, 27917.71 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 290.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.77 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6622025966644287, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.48, 'eval_steps_per_second': 11.48, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.71s/it]

{'eval_loss': 0.6204294562339783, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0911, 'eval_samples_per_second': 10.972, 'eval_steps_per_second': 10.972, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.72s/it]

{'eval_loss': 0.6147404909133911, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.254, 'eval_steps_per_second': 12.254, 'epoch': 3.0}


100%|██████████| 18/18 [00:36<00:00,  2.05s/it]


{'train_runtime': 36.9521, 'train_samples_per_second': 3.41, 'train_steps_per_second': 0.487, 'train_loss': 1.1282897525363498, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.74it/s]
100%|██████████| 1/1 [00:00<00:00, 133.08it/s]
100%|██████████| 1/1 [00:00<00:00, 133.14it/s]
Casting the dataset: 100%|██████████| 42/42 [00:00<00:00, 20949.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 993.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 287.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6622025966644287, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0921, 'eval_samples_per_second': 10.854, 'eval_steps_per_second': 10.854, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.73s/it]

{'eval_loss': 0.6204294562339783, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.816, 'eval_steps_per_second': 11.816, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.73s/it]

{'eval_loss': 0.6147404909133911, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.103, 'eval_steps_per_second': 12.103, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.09s/it]


{'train_runtime': 37.6515, 'train_samples_per_second': 3.346, 'train_steps_per_second': 0.478, 'train_loss': 1.1282897525363498, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.45it/s]
100%|██████████| 1/1 [00:00<00:00, 133.07it/s]
100%|██████████| 1/1 [00:00<00:00, 153.31it/s]
Casting the dataset: 100%|██████████| 42/42 [00:00<00:00, 20949.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 276.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 322.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6622025966644287, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0885, 'eval_samples_per_second': 11.296, 'eval_steps_per_second': 11.296, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:09,  1.66s/it]

{'eval_loss': 0.6204294562339783, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.48, 'eval_steps_per_second': 11.48, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:34<00:00,  1.73s/it]

{'eval_loss': 0.6147404909133911, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0811, 'eval_samples_per_second': 12.329, 'eval_steps_per_second': 12.329, 'epoch': 3.0}


100%|██████████| 18/18 [00:36<00:00,  2.01s/it]


{'train_runtime': 36.2036, 'train_samples_per_second': 3.48, 'train_steps_per_second': 0.497, 'train_loss': 1.1282897525363498, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.54it/s]
100%|██████████| 1/1 [00:00<00:00, 153.78it/s]
100%|██████████| 1/1 [00:00<00:00, 153.57it/s]
Casting the dataset: 100%|██████████| 42/42 [00:00<00:00, 20946.58 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 267.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 242.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.48 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6622025966644287, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0906, 'eval_samples_per_second': 11.032, 'eval_steps_per_second': 11.032, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:10,  1.70s/it]

{'eval_loss': 0.6204294562339783, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.347, 'eval_steps_per_second': 11.347, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.74s/it]

{'eval_loss': 0.6147404909133911, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.183, 'eval_steps_per_second': 12.183, 'epoch': 3.0}


100%|██████████| 18/18 [00:36<00:00,  2.04s/it]


{'train_runtime': 36.7616, 'train_samples_per_second': 3.427, 'train_steps_per_second': 0.49, 'train_loss': 1.1282897525363498, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.14it/s]
100%|██████████| 1/1 [00:00<00:00, 153.51it/s]
100%|██████████| 1/1 [00:00<00:00, 153.47it/s]
Casting the dataset: 100%|██████████| 42/42 [00:00<00:00, 20996.52 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 227.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.6622025966644287, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0875, 'eval_samples_per_second': 11.428, 'eval_steps_per_second': 11.428, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.84s/it]

{'eval_loss': 0.6204294562339783, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1424, 'eval_samples_per_second': 7.023, 'eval_steps_per_second': 7.023, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.75s/it]

{'eval_loss': 0.6147404909133911, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.41, 'eval_steps_per_second': 11.41, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.09s/it]


{'train_runtime': 37.6755, 'train_samples_per_second': 3.344, 'train_steps_per_second': 0.478, 'train_loss': 1.1282897525363498, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 181.41it/s]
100%|██████████| 1/1 [00:00<00:00, 153.59it/s]
100%|██████████| 1/1 [00:00<00:00, 133.08it/s]
Casting the dataset: 100%|██████████| 43/43 [00:00<00:00, 14264.08 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 243.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.63 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6753018498420715, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0972, 'eval_samples_per_second': 10.293, 'eval_steps_per_second': 10.293, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:24<00:11,  1.87s/it]

{'eval_loss': 0.6289864182472229, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1047, 'eval_samples_per_second': 9.552, 'eval_steps_per_second': 9.552, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:39<00:00,  2.07s/it]

{'eval_loss': 0.6059880256652832, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.615, 'eval_steps_per_second': 11.615, 'epoch': 3.0}


100%|██████████| 18/18 [00:40<00:00,  2.27s/it]


{'train_runtime': 40.8414, 'train_samples_per_second': 3.159, 'train_steps_per_second': 0.441, 'train_loss': 1.1117948955959744, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.65it/s]
100%|██████████| 1/1 [00:00<00:00, 142.77it/s]
100%|██████████| 1/1 [00:00<00:00, 133.03it/s]
Casting the dataset: 100%|██████████| 43/43 [00:00<00:00, 28564.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 497.13 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 287.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6753018498420715, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.957, 'eval_steps_per_second': 11.957, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:26<00:11,  1.92s/it]

{'eval_loss': 0.6289864182472229, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.891, 'eval_steps_per_second': 11.891, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:39<00:00,  1.89s/it]

{'eval_loss': 0.6059880256652832, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.176, 'eval_steps_per_second': 12.176, 'epoch': 3.0}


100%|██████████| 18/18 [00:40<00:00,  2.28s/it]


{'train_runtime': 40.9788, 'train_samples_per_second': 3.148, 'train_steps_per_second': 0.439, 'train_loss': 1.1117948955959744, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.81it/s]
100%|██████████| 1/1 [00:00<00:00, 133.19it/s]
100%|██████████| 1/1 [00:00<00:00, 153.67it/s]
Casting the dataset: 100%|██████████| 43/43 [00:00<00:00, 21493.87 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1992.54 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 278.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 308.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.13 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.6753018498420715, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.617, 'eval_steps_per_second': 11.617, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.78s/it]

{'eval_loss': 0.6289864182472229, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.961, 'eval_steps_per_second': 11.961, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.77s/it]

{'eval_loss': 0.6059880256652832, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.676, 'eval_steps_per_second': 11.676, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.09s/it]


{'train_runtime': 37.6521, 'train_samples_per_second': 3.426, 'train_steps_per_second': 0.478, 'train_loss': 1.1117948955959744, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.53it/s]
100%|██████████| 1/1 [00:00<00:00, 153.53it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
Casting the dataset: 100%|██████████| 43/43 [00:00<00:00, 17137.50 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.80 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 287.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 399.27 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6753018498420715, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0891, 'eval_samples_per_second': 11.219, 'eval_steps_per_second': 11.219, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.73s/it]

{'eval_loss': 0.6289864182472229, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1242, 'eval_samples_per_second': 8.052, 'eval_steps_per_second': 8.052, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.71s/it]

{'eval_loss': 0.6059880256652832, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1057, 'eval_samples_per_second': 9.463, 'eval_steps_per_second': 9.463, 'epoch': 3.0}


100%|██████████| 18/18 [00:36<00:00,  2.05s/it]


{'train_runtime': 36.9371, 'train_samples_per_second': 3.492, 'train_steps_per_second': 0.487, 'train_loss': 1.1117948955959744, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.49it/s]
100%|██████████| 1/1 [00:00<00:00, 166.55it/s]
100%|██████████| 1/1 [00:00<00:00, 153.40it/s]
Casting the dataset: 100%|██████████| 43/43 [00:00<00:00, 21481.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 398.43 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 282.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.82 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6753018498420715, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0882, 'eval_samples_per_second': 11.344, 'eval_steps_per_second': 11.344, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:24<00:11,  1.88s/it]

{'eval_loss': 0.6289864182472229, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0912, 'eval_samples_per_second': 10.97, 'eval_steps_per_second': 10.97, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:37<00:00,  1.87s/it]

{'eval_loss': 0.6059880256652832, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0931, 'eval_samples_per_second': 10.736, 'eval_steps_per_second': 10.736, 'epoch': 3.0}


100%|██████████| 18/18 [00:39<00:00,  2.17s/it]


{'train_runtime': 39.0538, 'train_samples_per_second': 3.303, 'train_steps_per_second': 0.461, 'train_loss': 1.1117948955959744, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 134.70it/s]
100%|██████████| 1/1 [00:00<00:00, 153.76it/s]
100%|██████████| 1/1 [00:00<00:00, 133.08it/s]
Casting the dataset: 100%|██████████| 44/44 [00:00<00:00, 43961.26 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 238.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6720958948135376, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.414, 'eval_steps_per_second': 11.414, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.76s/it]

{'eval_loss': 0.6263540387153625, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.681, 'eval_steps_per_second': 11.681, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.78s/it]

{'eval_loss': 0.6088177561759949, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0887, 'eval_samples_per_second': 11.28, 'eval_steps_per_second': 11.28, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.08s/it]


{'train_runtime': 37.3653, 'train_samples_per_second': 3.533, 'train_steps_per_second': 0.482, 'train_loss': 1.1129065619574652, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.53it/s]
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
100%|██████████| 1/1 [00:00<00:00, 153.62it/s]
Casting the dataset: 100%|██████████| 44/44 [00:00<00:00, 21941.43 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 497.25 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 272.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6720958948135376, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.68, 'eval_steps_per_second': 11.68, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:24<00:11,  1.84s/it]

{'eval_loss': 0.6263540387153625, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.347, 'eval_steps_per_second': 11.347, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:37<00:00,  1.83s/it]

{'eval_loss': 0.6088177561759949, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0926, 'eval_samples_per_second': 10.801, 'eval_steps_per_second': 10.801, 'epoch': 3.0}


100%|██████████| 18/18 [00:38<00:00,  2.14s/it]


{'train_runtime': 38.4439, 'train_samples_per_second': 3.434, 'train_steps_per_second': 0.468, 'train_loss': 1.1129065619574652, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.74it/s]
100%|██████████| 1/1 [00:00<00:00, 153.71it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 44/44 [00:00<00:00, 43763.19 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 261.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 296.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6720958948135376, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0891, 'eval_samples_per_second': 11.221, 'eval_steps_per_second': 11.221, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.78s/it]

{'eval_loss': 0.6263540387153625, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.107, 'eval_steps_per_second': 12.107, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.82s/it]

{'eval_loss': 0.6088177561759949, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.888, 'eval_steps_per_second': 11.888, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.08s/it]


{'train_runtime': 37.4003, 'train_samples_per_second': 3.529, 'train_steps_per_second': 0.481, 'train_loss': 1.1129065619574652, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.56it/s]
100%|██████████| 1/1 [00:00<00:00, 153.83it/s]
100%|██████████| 1/1 [00:00<00:00, 153.64it/s]
Casting the dataset: 100%|██████████| 44/44 [00:00<00:00, 21988.49 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 268.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 399.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6720958948135376, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0943, 'eval_samples_per_second': 10.608, 'eval_steps_per_second': 10.608, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.79s/it]

{'eval_loss': 0.6263540387153625, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0801, 'eval_samples_per_second': 12.485, 'eval_steps_per_second': 12.485, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.80s/it]

{'eval_loss': 0.6088177561759949, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0901, 'eval_samples_per_second': 11.1, 'eval_steps_per_second': 11.1, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.07s/it]


{'train_runtime': 37.2198, 'train_samples_per_second': 3.546, 'train_steps_per_second': 0.484, 'train_loss': 1.1129065619574652, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 124.89it/s]
100%|██████████| 1/1 [00:00<00:00, 133.08it/s]
100%|██████████| 1/1 [00:00<00:00, 166.61it/s]
Casting the dataset: 100%|██████████| 44/44 [00:00<00:00, 21970.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 270.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6720958948135376, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0806, 'eval_samples_per_second': 12.399, 'eval_steps_per_second': 12.399, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.76s/it]

{'eval_loss': 0.6263540387153625, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.484, 'eval_steps_per_second': 11.484, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.80s/it]

{'eval_loss': 0.6088177561759949, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.479, 'eval_steps_per_second': 11.479, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.07s/it]


{'train_runtime': 37.2035, 'train_samples_per_second': 3.548, 'train_steps_per_second': 0.484, 'train_loss': 1.1129065619574652, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.60it/s]
100%|██████████| 1/1 [00:00<00:00, 153.65it/s]
100%|██████████| 1/1 [00:00<00:00, 142.70it/s]
Casting the dataset: 100%|██████████| 45/45 [00:00<00:00, 22498.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.08 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 399.04 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 274.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6790259480476379, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0835, 'eval_samples_per_second': 11.98, 'eval_steps_per_second': 11.98, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.80s/it]

{'eval_loss': 0.6285935044288635, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.75, 'eval_steps_per_second': 11.75, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.83s/it]

{'eval_loss': 0.6056214570999146, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.958, 'eval_steps_per_second': 11.958, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.07s/it]


{'train_runtime': 37.1975, 'train_samples_per_second': 3.629, 'train_steps_per_second': 0.484, 'train_loss': 1.1075258255004883, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
100%|██████████| 1/1 [00:00<00:00, 142.65it/s]
Casting the dataset: 100%|██████████| 45/45 [00:00<00:00, 22517.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.08 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 268.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6790259480476379, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.682, 'eval_steps_per_second': 11.682, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.83s/it]

{'eval_loss': 0.6285935044288635, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.611, 'eval_steps_per_second': 11.611, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.82s/it]

{'eval_loss': 0.6056214570999146, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.889, 'eval_steps_per_second': 11.889, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.07s/it]


{'train_runtime': 37.2537, 'train_samples_per_second': 3.624, 'train_steps_per_second': 0.483, 'train_loss': 1.1075258255004883, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.66it/s]
100%|██████████| 1/1 [00:00<00:00, 153.58it/s]
100%|██████████| 1/1 [00:00<00:00, 166.39it/s]
Casting the dataset: 100%|██████████| 45/45 [00:00<00:00, 17963.61 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 399.19 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 279.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.6790259480476379, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0912, 'eval_samples_per_second': 10.97, 'eval_steps_per_second': 10.97, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.82s/it]

{'eval_loss': 0.6285935044288635, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.817, 'eval_steps_per_second': 11.817, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.82s/it]

{'eval_loss': 0.6056214570999146, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.031, 'eval_steps_per_second': 12.031, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.07s/it]


{'train_runtime': 37.2259, 'train_samples_per_second': 3.627, 'train_steps_per_second': 0.484, 'train_loss': 1.1075258255004883, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.81it/s]
100%|██████████| 1/1 [00:00<00:00, 142.69it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
Casting the dataset: 100%|██████████| 45/45 [00:00<00:00, 22450.78 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.23 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.32 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 287.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.77 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6790259480476379, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.346, 'eval_steps_per_second': 11.346, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.83s/it]

{'eval_loss': 0.6285935044288635, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0791, 'eval_samples_per_second': 12.641, 'eval_steps_per_second': 12.641, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.83s/it]

{'eval_loss': 0.6056214570999146, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0878, 'eval_samples_per_second': 11.383, 'eval_steps_per_second': 11.383, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.08s/it]


{'train_runtime': 37.4584, 'train_samples_per_second': 3.604, 'train_steps_per_second': 0.481, 'train_loss': 1.1075258255004883, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.64it/s]
100%|██████████| 1/1 [00:00<00:00, 166.56it/s]
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
Casting the dataset: 100%|██████████| 45/45 [00:00<00:00, 22474.84 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 270.47 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 330.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6790259480476379, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1042, 'eval_samples_per_second': 9.601, 'eval_steps_per_second': 9.601, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.83s/it]

{'eval_loss': 0.6285935044288635, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.819, 'eval_steps_per_second': 11.819, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.82s/it]

{'eval_loss': 0.6056214570999146, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.251, 'eval_steps_per_second': 12.251, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.07s/it]


{'train_runtime': 37.2676, 'train_samples_per_second': 3.622, 'train_steps_per_second': 0.483, 'train_loss': 1.1075258255004883, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.75it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 142.78it/s]
Casting the dataset: 100%|██████████| 46/46 [00:00<00:00, 45992.37 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.92 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 262.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6828042268753052, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.481, 'eval_steps_per_second': 11.481, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.85s/it]

{'eval_loss': 0.6304429173469543, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1302, 'eval_samples_per_second': 7.682, 'eval_steps_per_second': 7.682, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.86s/it]

{'eval_loss': 0.6039950251579285, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0791, 'eval_samples_per_second': 12.64, 'eval_steps_per_second': 12.64, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.09s/it]


{'train_runtime': 37.5775, 'train_samples_per_second': 3.672, 'train_steps_per_second': 0.479, 'train_loss': 1.1052476035224066, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.53it/s]
100%|██████████| 1/1 [00:00<00:00, 153.71it/s]
100%|██████████| 1/1 [00:00<00:00, 181.56it/s]
Casting the dataset: 100%|██████████| 46/46 [00:00<00:00, 22998.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.02 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 283.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 399.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6828042268753052, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.031, 'eval_steps_per_second': 12.031, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.85s/it]

{'eval_loss': 0.6304429173469543, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.893, 'eval_steps_per_second': 11.893, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.88s/it]

{'eval_loss': 0.6039950251579285, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0796, 'eval_samples_per_second': 12.562, 'eval_steps_per_second': 12.562, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.08s/it]


{'train_runtime': 37.4655, 'train_samples_per_second': 3.683, 'train_steps_per_second': 0.48, 'train_loss': 1.1052476035224066, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.73it/s]
100%|██████████| 1/1 [00:00<00:00, 142.73it/s]
100%|██████████| 1/1 [00:00<00:00, 153.59it/s]
Casting the dataset: 100%|██████████| 46/46 [00:00<00:00, 22985.23 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.85 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 251.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.42 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6828042268753052, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.963, 'eval_steps_per_second': 11.963, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.89s/it]

{'eval_loss': 0.6304429173469543, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.959, 'eval_steps_per_second': 11.959, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.87s/it]

{'eval_loss': 0.6039950251579285, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.959, 'eval_steps_per_second': 11.959, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.10s/it]


{'train_runtime': 37.7312, 'train_samples_per_second': 3.657, 'train_steps_per_second': 0.477, 'train_loss': 1.1052476035224066, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.74it/s]
100%|██████████| 1/1 [00:00<00:00, 164.55it/s]
100%|██████████| 1/1 [00:00<00:00, 153.78it/s]
Casting the dataset: 100%|██████████| 46/46 [00:00<00:00, 23001.67 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 262.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6828042268753052, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.816, 'eval_steps_per_second': 11.816, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.85s/it]

{'eval_loss': 0.6304429173469543, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.889, 'eval_steps_per_second': 11.889, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.85s/it]

{'eval_loss': 0.6039950251579285, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0796, 'eval_samples_per_second': 12.561, 'eval_steps_per_second': 12.561, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.08s/it]


{'train_runtime': 37.3546, 'train_samples_per_second': 3.694, 'train_steps_per_second': 0.482, 'train_loss': 1.1052476035224066, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.41it/s]
100%|██████████| 1/1 [00:00<00:00, 153.42it/s]
100%|██████████| 1/1 [00:00<00:00, 153.56it/s]
Casting the dataset: 100%|██████████| 46/46 [00:00<00:00, 18392.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.76 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 258.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.26 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6828042268753052, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0791, 'eval_samples_per_second': 12.641, 'eval_steps_per_second': 12.641, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.91s/it]

{'eval_loss': 0.6304429173469543, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0796, 'eval_samples_per_second': 12.561, 'eval_steps_per_second': 12.561, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.88s/it]

{'eval_loss': 0.6039950251579285, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0811, 'eval_samples_per_second': 12.33, 'eval_steps_per_second': 12.33, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.10s/it]


{'train_runtime': 37.8741, 'train_samples_per_second': 3.644, 'train_steps_per_second': 0.475, 'train_loss': 1.1052476035224066, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 153.67it/s]
Casting the dataset: 100%|██████████| 47/47 [00:00<00:00, 23443.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 279.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 312.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.70 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6816064715385437, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0911, 'eval_samples_per_second': 10.973, 'eval_steps_per_second': 10.973, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.91s/it]

{'eval_loss': 0.6302164196968079, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.891, 'eval_steps_per_second': 11.891, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.92s/it]

{'eval_loss': 0.6057775616645813, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0812, 'eval_samples_per_second': 12.312, 'eval_steps_per_second': 12.312, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.11s/it]


{'train_runtime': 37.897, 'train_samples_per_second': 3.721, 'train_steps_per_second': 0.475, 'train_loss': 1.1034667756822374, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 117.42it/s]
100%|██████████| 1/1 [00:00<00:00, 135.52it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 47/47 [00:00<00:00, 23476.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 399.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 265.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 399.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6816064715385437, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.411, 'eval_steps_per_second': 11.411, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:24<00:11,  1.91s/it]

{'eval_loss': 0.6302164196968079, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0796, 'eval_samples_per_second': 12.562, 'eval_steps_per_second': 12.562, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.94s/it]

{'eval_loss': 0.6057775616645813, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0776, 'eval_samples_per_second': 12.885, 'eval_steps_per_second': 12.885, 'epoch': 3.0}


100%|██████████| 18/18 [00:38<00:00,  2.12s/it]


{'train_runtime': 38.116, 'train_samples_per_second': 3.699, 'train_steps_per_second': 0.472, 'train_loss': 1.1034667756822374, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.58it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 153.66it/s]
Casting the dataset: 100%|██████████| 47/47 [00:00<00:00, 18769.14 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.14 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 261.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 398.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.6816064715385437, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0811, 'eval_samples_per_second': 12.328, 'eval_steps_per_second': 12.328, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:24<00:12,  2.05s/it]

{'eval_loss': 0.6302164196968079, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0883, 'eval_samples_per_second': 11.331, 'eval_steps_per_second': 11.331, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:38<00:00,  2.01s/it]

{'eval_loss': 0.6057775616645813, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.541, 'eval_steps_per_second': 11.541, 'epoch': 3.0}


100%|██████████| 18/18 [00:39<00:00,  2.21s/it]


{'train_runtime': 39.6981, 'train_samples_per_second': 3.552, 'train_steps_per_second': 0.453, 'train_loss': 1.1034667756822374, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.54it/s]
100%|██████████| 1/1 [00:00<00:00, 153.62it/s]
100%|██████████| 1/1 [00:00<00:00, 153.51it/s]
Casting the dataset: 100%|██████████| 47/47 [00:00<00:00, 21081.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.33 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 265.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 399.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.73 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.6816064715385437, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.956, 'eval_steps_per_second': 11.956, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:24<00:11,  1.96s/it]

{'eval_loss': 0.6302164196968079, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.89, 'eval_steps_per_second': 11.89, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:37<00:00,  1.98s/it]

{'eval_loss': 0.6057775616645813, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.681, 'eval_steps_per_second': 11.681, 'epoch': 3.0}


100%|██████████| 18/18 [00:38<00:00,  2.14s/it]


{'train_runtime': 38.5066, 'train_samples_per_second': 3.662, 'train_steps_per_second': 0.467, 'train_loss': 1.1034667756822374, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.80it/s]
100%|██████████| 1/1 [00:00<00:00, 166.57it/s]
100%|██████████| 1/1 [00:00<00:00, 153.55it/s]
Casting the dataset: 100%|██████████| 47/47 [00:00<00:00, 23496.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.92 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 258.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6816064715385437, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.03, 'eval_steps_per_second': 12.03, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:24<00:11,  1.99s/it]

{'eval_loss': 0.6302164196968079, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0928, 'eval_samples_per_second': 10.778, 'eval_steps_per_second': 10.778, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:38<00:00,  1.97s/it]

{'eval_loss': 0.6057775616645813, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.251, 'eval_steps_per_second': 12.251, 'epoch': 3.0}


100%|██████████| 18/18 [00:39<00:00,  2.19s/it]


{'train_runtime': 39.4138, 'train_samples_per_second': 3.577, 'train_steps_per_second': 0.457, 'train_loss': 1.1034667756822374, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.78it/s]
100%|██████████| 1/1 [00:00<00:00, 153.55it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
Casting the dataset: 100%|██████████| 48/48 [00:00<00:00, 23936.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 256.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6847342252731323, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.745, 'eval_steps_per_second': 11.745, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:24<00:11,  1.97s/it]

{'eval_loss': 0.6317565441131592, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0847, 'eval_samples_per_second': 11.809, 'eval_steps_per_second': 11.809, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:37<00:00,  2.00s/it]

{'eval_loss': 0.6032462120056152, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.271, 'eval_steps_per_second': 12.271, 'epoch': 3.0}


100%|██████████| 18/18 [00:38<00:00,  2.16s/it]


{'train_runtime': 38.8304, 'train_samples_per_second': 3.708, 'train_steps_per_second': 0.464, 'train_loss': 1.1019310421413846, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
100%|██████████| 1/1 [00:00<00:00, 153.81it/s]
100%|██████████| 1/1 [00:00<00:00, 153.51it/s]
Casting the dataset: 100%|██████████| 48/48 [00:00<00:00, 19172.14 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.91 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 662.08 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 242.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6847342252731323, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.615, 'eval_steps_per_second': 11.615, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:24<00:12,  2.04s/it]

{'eval_loss': 0.6317565441131592, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0954, 'eval_samples_per_second': 10.481, 'eval_steps_per_second': 10.481, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:37<00:00,  1.98s/it]

{'eval_loss': 0.6032462120056152, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0791, 'eval_samples_per_second': 12.641, 'eval_steps_per_second': 12.641, 'epoch': 3.0}


100%|██████████| 18/18 [00:38<00:00,  2.15s/it]


{'train_runtime': 38.7832, 'train_samples_per_second': 3.713, 'train_steps_per_second': 0.464, 'train_loss': 1.1019310421413846, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.46it/s]
100%|██████████| 1/1 [00:00<00:00, 166.33it/s]
100%|██████████| 1/1 [00:00<00:00, 153.63it/s]
Casting the dataset: 100%|██████████| 48/48 [00:00<00:00, 23996.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 992.97 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.92 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 256.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6847342252731323, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.254, 'eval_steps_per_second': 11.254, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:24<00:12,  2.07s/it]

{'eval_loss': 0.6317565441131592, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.608, 'eval_steps_per_second': 11.608, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:37<00:00,  2.01s/it]

{'eval_loss': 0.6032462120056152, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0837, 'eval_samples_per_second': 11.947, 'eval_steps_per_second': 11.947, 'epoch': 3.0}


100%|██████████| 18/18 [00:39<00:00,  2.18s/it]


{'train_runtime': 39.3015, 'train_samples_per_second': 3.664, 'train_steps_per_second': 0.458, 'train_loss': 1.1019310421413846, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.53it/s]
100%|██████████| 1/1 [00:00<00:00, 153.59it/s]
100%|██████████| 1/1 [00:00<00:00, 166.43it/s]
Casting the dataset: 100%|██████████| 48/48 [00:00<00:00, 23973.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 242.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.6847342252731323, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.347, 'eval_steps_per_second': 11.347, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:24<00:11,  1.96s/it]

{'eval_loss': 0.6317565441131592, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.611, 'eval_steps_per_second': 11.611, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:37<00:00,  1.96s/it]

{'eval_loss': 0.6032462120056152, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.752, 'eval_steps_per_second': 11.752, 'epoch': 3.0}


100%|██████████| 18/18 [00:38<00:00,  2.16s/it]


{'train_runtime': 38.9391, 'train_samples_per_second': 3.698, 'train_steps_per_second': 0.462, 'train_loss': 1.1019310421413846, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.49it/s]
100%|██████████| 1/1 [00:00<00:00, 153.68it/s]
100%|██████████| 1/1 [00:00<00:00, 153.50it/s]
Casting the dataset: 100%|██████████| 48/48 [00:00<00:00, 19135.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 497.84 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 255.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 276.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6847342252731323, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.61, 'eval_steps_per_second': 11.61, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:24<00:11,  1.95s/it]

{'eval_loss': 0.6317565441131592, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.257, 'eval_steps_per_second': 12.257, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:37<00:00,  2.01s/it]

{'eval_loss': 0.6032462120056152, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0776, 'eval_samples_per_second': 12.885, 'eval_steps_per_second': 12.885, 'epoch': 3.0}


100%|██████████| 18/18 [00:38<00:00,  2.14s/it]


{'train_runtime': 38.4975, 'train_samples_per_second': 3.741, 'train_steps_per_second': 0.468, 'train_loss': 1.1019310421413846, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.68it/s]
100%|██████████| 1/1 [00:00<00:00, 166.75it/s]
100%|██████████| 1/1 [00:00<00:00, 166.75it/s]
Casting the dataset: 100%|██████████| 49/49 [00:00<00:00, 24423.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.79 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 252.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.62007737159729, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0901, 'eval_samples_per_second': 11.096, 'eval_steps_per_second': 11.096, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:25<00:10,  1.53s/it]

{'eval_loss': 0.5598915219306946, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.252, 'eval_steps_per_second': 12.252, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:38<00:00,  1.53s/it]

{'eval_loss': 0.5251340270042419, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0867, 'eval_samples_per_second': 11.536, 'eval_steps_per_second': 11.536, 'epoch': 3.0}


100%|██████████| 21/21 [00:39<00:00,  1.90s/it]


{'train_runtime': 39.8932, 'train_samples_per_second': 3.685, 'train_steps_per_second': 0.526, 'train_loss': 1.006315412975493, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.74it/s]
100%|██████████| 1/1 [00:00<00:00, 181.65it/s]
100%|██████████| 1/1 [00:00<00:00, 181.62it/s]
Casting the dataset: 100%|██████████| 49/49 [00:00<00:00, 19584.61 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.38 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.45 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 261.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 281.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8351277112960815, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.181, 'eval_steps_per_second': 12.181, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:25<00:10,  1.55s/it]

{'eval_loss': 0.7656381130218506, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.75, 'eval_steps_per_second': 11.75, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:39<00:00,  1.61s/it]

{'eval_loss': 0.7527693510055542, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0897, 'eval_samples_per_second': 11.146, 'eval_steps_per_second': 11.146, 'epoch': 3.0}


100%|██████████| 21/21 [00:40<00:00,  1.95s/it]


{'train_runtime': 40.9341, 'train_samples_per_second': 3.591, 'train_steps_per_second': 0.513, 'train_loss': 0.7879303523472377, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.59it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
100%|██████████| 1/1 [00:00<00:00, 76.79it/s]
Casting the dataset: 100%|██████████| 49/49 [00:00<00:00, 24498.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 272.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.14 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.8351277112960815, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0942, 'eval_samples_per_second': 10.619, 'eval_steps_per_second': 10.619, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.58s/it]

{'eval_loss': 0.7656381130218506, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0806, 'eval_samples_per_second': 12.403, 'eval_steps_per_second': 12.403, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.63s/it]

{'eval_loss': 0.7527693510055542, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.1, 'eval_steps_per_second': 12.1, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.98s/it]


{'train_runtime': 41.5479, 'train_samples_per_second': 3.538, 'train_steps_per_second': 0.505, 'train_loss': 0.7879303523472377, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 181.59it/s]
100%|██████████| 1/1 [00:00<00:00, 153.38it/s]
100%|██████████| 1/1 [00:00<00:00, 153.46it/s]
Casting the dataset: 100%|██████████| 49/49 [00:00<00:00, 19523.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.85 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 246.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 226.93 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8351277112960815, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.348, 'eval_steps_per_second': 11.348, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.63s/it]

{'eval_loss': 0.7656381130218506, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0877, 'eval_samples_per_second': 11.408, 'eval_steps_per_second': 11.408, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.58s/it]

{'eval_loss': 0.7527693510055542, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0865, 'eval_samples_per_second': 11.559, 'eval_steps_per_second': 11.559, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  2.00s/it]


{'train_runtime': 41.8995, 'train_samples_per_second': 3.508, 'train_steps_per_second': 0.501, 'train_loss': 0.7879303523472377, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
100%|██████████| 1/1 [00:00<00:00, 142.88it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
Casting the dataset: 100%|██████████| 49/49 [00:00<00:00, 24481.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 245.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8351277112960815, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.088, 'eval_samples_per_second': 11.363, 'eval_steps_per_second': 11.363, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:25<00:11,  1.59s/it]

{'eval_loss': 0.7656381130218506, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.195, 'eval_steps_per_second': 12.195, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:39<00:00,  1.58s/it]

{'eval_loss': 0.7527693510055542, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.133, 'eval_samples_per_second': 7.517, 'eval_steps_per_second': 7.517, 'epoch': 3.0}


100%|██████████| 21/21 [00:40<00:00,  1.94s/it]


{'train_runtime': 40.8265, 'train_samples_per_second': 3.601, 'train_steps_per_second': 0.514, 'train_loss': 0.7879303523472377, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 168.41it/s]
100%|██████████| 1/1 [00:00<00:00, 93.44it/s]
100%|██████████| 1/1 [00:00<00:00, 246.43it/s]
Casting the dataset: 100%|██████████| 50/50 [00:00<00:00, 24972.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 147.25 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 266.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this mo

{'eval_loss': 0.83821702003479, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0763, 'eval_samples_per_second': 13.099, 'eval_steps_per_second': 13.099, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:25<00:11,  1.62s/it]

{'eval_loss': 0.766705334186554, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.761, 'eval_steps_per_second': 11.761, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:39<00:00,  1.64s/it]

{'eval_loss': 0.7520114779472351, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0832, 'eval_samples_per_second': 12.013, 'eval_steps_per_second': 12.013, 'epoch': 3.0}


100%|██████████| 21/21 [00:40<00:00,  1.95s/it]


{'train_runtime': 40.9262, 'train_samples_per_second': 3.665, 'train_steps_per_second': 0.513, 'train_loss': 0.7851112910679409, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.43it/s]
100%|██████████| 1/1 [00:00<00:00, 165.80it/s]
100%|██████████| 1/1 [00:00<00:00, 154.93it/s]
Casting the dataset: 100%|██████████| 50/50 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 144.38 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 147.70 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 260.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 443.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 348.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN

{'eval_loss': 0.83821702003479, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0767, 'eval_samples_per_second': 13.033, 'eval_steps_per_second': 13.033, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:25<00:11,  1.61s/it]

{'eval_loss': 0.766705334186554, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1319, 'eval_samples_per_second': 7.583, 'eval_steps_per_second': 7.583, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:39<00:00,  1.61s/it]

{'eval_loss': 0.7520114779472351, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0811, 'eval_samples_per_second': 12.328, 'eval_steps_per_second': 12.328, 'epoch': 3.0}


100%|██████████| 21/21 [00:40<00:00,  1.93s/it]


{'train_runtime': 40.6132, 'train_samples_per_second': 3.693, 'train_steps_per_second': 0.517, 'train_loss': 0.7851112910679409, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 148.17it/s]
100%|██████████| 1/1 [00:00<00:00, 177.04it/s]
100%|██████████| 1/1 [00:00<00:00, 144.60it/s]
Casting the dataset: 100%|██████████| 50/50 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 250.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 261.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a 

{'eval_loss': 0.83821702003479, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0885, 'eval_samples_per_second': 11.295, 'eval_steps_per_second': 11.295, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:25<00:11,  1.61s/it]

{'eval_loss': 0.766705334186554, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.295, 'eval_steps_per_second': 12.295, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:39<00:00,  1.62s/it]

{'eval_loss': 0.7520114779472351, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0794, 'eval_samples_per_second': 12.6, 'eval_steps_per_second': 12.6, 'epoch': 3.0}


100%|██████████| 21/21 [00:40<00:00,  1.94s/it]


{'train_runtime': 40.7289, 'train_samples_per_second': 3.683, 'train_steps_per_second': 0.516, 'train_loss': 0.7851112910679409, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 144.39it/s]
100%|██████████| 1/1 [00:00<00:00, 327.53it/s]
100%|██████████| 1/1 [00:00<00:00, 105.06it/s]
Casting the dataset: 100%|██████████| 50/50 [00:00<00:00, 25031.65 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1464.49 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 984.35 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 248.24 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 481.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 115.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pr

{'eval_loss': 0.83821702003479, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.297, 'eval_steps_per_second': 12.297, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.62s/it]

{'eval_loss': 0.766705334186554, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0763, 'eval_samples_per_second': 13.107, 'eval_steps_per_second': 13.107, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:39<00:00,  1.62s/it]

{'eval_loss': 0.7520114779472351, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.142, 'eval_steps_per_second': 12.142, 'epoch': 3.0}


100%|██████████| 21/21 [00:40<00:00,  1.95s/it]


{'train_runtime': 40.9234, 'train_samples_per_second': 3.665, 'train_steps_per_second': 0.513, 'train_loss': 0.7851112910679409, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 144.59it/s]
100%|██████████| 1/1 [00:00<00:00, 165.28it/s]
100%|██████████| 1/1 [00:00<00:00, 166.25it/s]
Casting the dataset: 100%|██████████| 50/50 [00:00<00:00, 265798.73 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 269.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 632.72 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably T

{'eval_loss': 0.83821702003479, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0862, 'eval_samples_per_second': 11.599, 'eval_steps_per_second': 11.599, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.62s/it]

{'eval_loss': 0.766705334186554, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0764, 'eval_samples_per_second': 13.088, 'eval_steps_per_second': 13.088, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:39<00:00,  1.64s/it]

{'eval_loss': 0.7520114779472351, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1357, 'eval_samples_per_second': 7.371, 'eval_steps_per_second': 7.371, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.96s/it]


{'train_runtime': 41.0921, 'train_samples_per_second': 3.65, 'train_steps_per_second': 0.511, 'train_loss': 0.7851112910679409, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 145.19it/s]
100%|██████████| 1/1 [00:00<00:00, 145.41it/s]
100%|██████████| 1/1 [00:00<00:00, 146.60it/s]
Casting the dataset: 100%|██████████| 51/51 [00:00<00:00, 25269.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 282.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 144.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 340.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this m

{'eval_loss': 0.8390868306159973, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0902, 'eval_samples_per_second': 11.091, 'eval_steps_per_second': 11.091, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.67s/it]

{'eval_loss': 0.7673991918563843, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.136, 'eval_steps_per_second': 12.136, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.71s/it]

{'eval_loss': 0.7516944408416748, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0847, 'eval_samples_per_second': 11.81, 'eval_steps_per_second': 11.81, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.98s/it]


{'train_runtime': 41.5057, 'train_samples_per_second': 3.686, 'train_steps_per_second': 0.506, 'train_loss': 0.7847472599574498, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 149.89it/s]
100%|██████████| 1/1 [00:00<00:00, 93.34it/s]
100%|██████████| 1/1 [00:00<00:00, 143.94it/s]
Casting the dataset: 100%|██████████| 51/51 [00:00<00:00, 25489.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 521.49 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 236.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 123.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAI

{'eval_loss': 0.8390868306159973, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0773, 'eval_samples_per_second': 12.941, 'eval_steps_per_second': 12.941, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:11,  1.71s/it]

{'eval_loss': 0.7673991918563843, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1269, 'eval_samples_per_second': 7.879, 'eval_steps_per_second': 7.879, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:41<00:00,  1.72s/it]

{'eval_loss': 0.7516944408416748, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1024, 'eval_samples_per_second': 9.77, 'eval_steps_per_second': 9.77, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.03s/it]


{'train_runtime': 42.583, 'train_samples_per_second': 3.593, 'train_steps_per_second': 0.493, 'train_loss': 0.7847472599574498, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 308.79it/s]
100%|██████████| 1/1 [00:00<00:00, 374.09it/s]
100%|██████████| 1/1 [00:00<00:00, 110.80it/s]
Casting the dataset: 100%|██████████| 51/51 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 254.47 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 341.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 141.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 453.34 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAI

{'eval_loss': 0.8390868306159973, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0763, 'eval_samples_per_second': 13.11, 'eval_steps_per_second': 13.11, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.67s/it]

{'eval_loss': 0.7673991918563843, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1289, 'eval_samples_per_second': 7.761, 'eval_steps_per_second': 7.761, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.68s/it]

{'eval_loss': 0.7516944408416748, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1347, 'eval_samples_per_second': 7.425, 'eval_steps_per_second': 7.425, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.98s/it]


{'train_runtime': 41.4798, 'train_samples_per_second': 3.689, 'train_steps_per_second': 0.506, 'train_loss': 0.7847472599574498, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 216.82it/s]
100%|██████████| 1/1 [00:00<00:00, 156.98it/s]
100%|██████████| 1/1 [00:00<00:00, 176.37it/s]
Casting the dataset: 100%|██████████| 51/51 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 140.58 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 146.80 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 276.53 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 345.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 142.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this mod

{'eval_loss': 0.8390868306159973, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0852, 'eval_samples_per_second': 11.734, 'eval_steps_per_second': 11.734, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.68s/it]

{'eval_loss': 0.7673991918563843, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1319, 'eval_samples_per_second': 7.581, 'eval_steps_per_second': 7.581, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.70s/it]

{'eval_loss': 0.7516944408416748, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0835, 'eval_samples_per_second': 11.973, 'eval_steps_per_second': 11.973, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.99s/it]


{'train_runtime': 41.707, 'train_samples_per_second': 3.668, 'train_steps_per_second': 0.504, 'train_loss': 0.7847472599574498, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 274.05it/s]
100%|██████████| 1/1 [00:00<00:00, 112.70it/s]
100%|██████████| 1/1 [00:00<00:00, 481.77it/s]
Casting the dataset: 100%|██████████| 51/51 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 509.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 243.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 355.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 469.69 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRA

{'eval_loss': 0.8390868306159973, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1241, 'eval_samples_per_second': 8.058, 'eval_steps_per_second': 8.058, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.68s/it]

{'eval_loss': 0.7673991918563843, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1242, 'eval_samples_per_second': 8.054, 'eval_steps_per_second': 8.054, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.68s/it]

{'eval_loss': 0.7516944408416748, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0903, 'eval_samples_per_second': 11.079, 'eval_steps_per_second': 11.079, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.98s/it]


{'train_runtime': 41.6686, 'train_samples_per_second': 3.672, 'train_steps_per_second': 0.504, 'train_loss': 0.7847472599574498, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.80it/s]
100%|██████████| 1/1 [00:00<00:00, 166.18it/s]
100%|██████████| 1/1 [00:00<00:00, 2824.45it/s]
Casting the dataset: 100%|██████████| 52/52 [00:00<00:00, 26001.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 894.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 285.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 539.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably T

{'eval_loss': 0.8370981216430664, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.516, 'eval_steps_per_second': 12.516, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.75s/it]

{'eval_loss': 0.7647171020507812, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0863, 'eval_samples_per_second': 11.583, 'eval_steps_per_second': 11.583, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:41<00:00,  1.76s/it]

{'eval_loss': 0.7486385703086853, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.993, 'eval_steps_per_second': 11.993, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.02s/it]


{'train_runtime': 42.4601, 'train_samples_per_second': 3.674, 'train_steps_per_second': 0.495, 'train_loss': 0.7834093003045945, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 90.52it/s]
100%|██████████| 1/1 [00:00<00:00, 4387.35it/s]
100%|██████████| 1/1 [00:00<00:00, 90.68it/s]
Casting the dataset: 100%|██████████| 52/52 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 267.34 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 123.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 357.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 354.19 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a do

{'eval_loss': 0.8370981216430664, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.512, 'eval_steps_per_second': 12.512, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.71s/it]

{'eval_loss': 0.7647171020507812, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0862, 'eval_samples_per_second': 11.603, 'eval_steps_per_second': 11.603, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.72s/it]

{'eval_loss': 0.7486385703086853, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.996, 'eval_steps_per_second': 11.996, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.99s/it]


{'train_runtime': 41.7627, 'train_samples_per_second': 3.735, 'train_steps_per_second': 0.503, 'train_loss': 0.7834093003045945, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 143.71it/s]
100%|██████████| 1/1 [00:00<00:00, 110.87it/s]
100%|██████████| 1/1 [00:00<00:00, 114.48it/s]
Casting the dataset: 100%|██████████| 52/52 [00:00<00:00, 25995.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 889.94 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 270.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this m

{'eval_loss': 0.8370981216430664, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.915, 'eval_steps_per_second': 11.915, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:12,  1.75s/it]

{'eval_loss': 0.7647171020507812, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.071, 'eval_steps_per_second': 12.071, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:41<00:00,  1.75s/it]

{'eval_loss': 0.7486385703086853, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0761, 'eval_samples_per_second': 13.137, 'eval_steps_per_second': 13.137, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.02s/it]


{'train_runtime': 42.332, 'train_samples_per_second': 3.685, 'train_steps_per_second': 0.496, 'train_loss': 0.7834093003045945, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.42it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 245.15it/s]
Casting the dataset: 100%|██████████| 52/52 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 272.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 241.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a 

{'eval_loss': 0.8370981216430664, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0773, 'eval_samples_per_second': 12.929, 'eval_steps_per_second': 12.929, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:12,  1.74s/it]

{'eval_loss': 0.7647171020507812, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.079, 'eval_samples_per_second': 12.656, 'eval_steps_per_second': 12.656, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.73s/it]

{'eval_loss': 0.7486385703086853, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.707, 'eval_steps_per_second': 11.707, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.00s/it]


{'train_runtime': 42.0831, 'train_samples_per_second': 3.707, 'train_steps_per_second': 0.499, 'train_loss': 0.7834093003045945, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 3172.70it/s]
100%|██████████| 1/1 [00:00<00:00, 456.30it/s]
100%|██████████| 1/1 [00:00<00:00, 111.33it/s]
Casting the dataset: 100%|██████████| 52/52 [00:00<00:00, 7453.99 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 884.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 256.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 331.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 547.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TR

{'eval_loss': 0.8370981216430664, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1267, 'eval_samples_per_second': 7.89, 'eval_steps_per_second': 7.89, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:12,  1.74s/it]

{'eval_loss': 0.7647171020507812, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0855, 'eval_samples_per_second': 11.702, 'eval_steps_per_second': 11.702, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.76s/it]

{'eval_loss': 0.7486385703086853, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0753, 'eval_samples_per_second': 13.284, 'eval_steps_per_second': 13.284, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.01s/it]


{'train_runtime': 42.1464, 'train_samples_per_second': 3.701, 'train_steps_per_second': 0.498, 'train_loss': 0.7834093003045945, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 243.35it/s]
100%|██████████| 1/1 [00:00<00:00, 156.81it/s]
100%|██████████| 1/1 [00:00<00:00, 144.28it/s]
Casting the dataset: 100%|██████████| 53/53 [00:00<00:00, 26514.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 142.79 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 254.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 352.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 113.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRA

{'eval_loss': 0.8360763788223267, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0822, 'eval_samples_per_second': 12.167, 'eval_steps_per_second': 12.167, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:12,  1.76s/it]

{'eval_loss': 0.7663393020629883, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0772, 'eval_samples_per_second': 12.961, 'eval_steps_per_second': 12.961, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.78s/it]

{'eval_loss': 0.7505645155906677, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0753, 'eval_samples_per_second': 13.287, 'eval_steps_per_second': 13.287, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.01s/it]


{'train_runtime': 42.1645, 'train_samples_per_second': 3.771, 'train_steps_per_second': 0.498, 'train_loss': 0.7820578075590587, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 374.99it/s]
100%|██████████| 1/1 [00:00<00:00, 212.98it/s]
100%|██████████| 1/1 [00:00<00:00, 325.09it/s]
Casting the dataset: 100%|██████████| 53/53 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 253.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 253.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 496.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 341.14 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAI

{'eval_loss': 0.8360763788223267, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.984, 'eval_steps_per_second': 11.984, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.80s/it]

{'eval_loss': 0.7663393020629883, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0765, 'eval_samples_per_second': 13.076, 'eval_steps_per_second': 13.076, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:41<00:00,  1.80s/it]

{'eval_loss': 0.7505645155906677, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1037, 'eval_samples_per_second': 9.639, 'eval_steps_per_second': 9.639, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.04s/it]


{'train_runtime': 42.8281, 'train_samples_per_second': 3.713, 'train_steps_per_second': 0.49, 'train_loss': 0.7820578075590587, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 148.32it/s]
100%|██████████| 1/1 [00:00<00:00, 169.04it/s]
100%|██████████| 1/1 [00:00<00:00, 84.53it/s]
Casting the dataset: 100%|██████████| 53/53 [00:00<00:00, 26301.24 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1197.00 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 246.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TR

{'eval_loss': 0.8360763788223267, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0765, 'eval_samples_per_second': 13.067, 'eval_steps_per_second': 13.067, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:12,  1.79s/it]

{'eval_loss': 0.7663393020629883, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1188, 'eval_samples_per_second': 8.416, 'eval_steps_per_second': 8.416, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:41<00:00,  1.78s/it]

{'eval_loss': 0.7505645155906677, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0825, 'eval_samples_per_second': 12.125, 'eval_steps_per_second': 12.125, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.01s/it]


{'train_runtime': 42.3045, 'train_samples_per_second': 3.758, 'train_steps_per_second': 0.496, 'train_loss': 0.7820578075590587, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 85.89it/s]
100%|██████████| 1/1 [00:00<00:00, 99.58it/s]
100%|██████████| 1/1 [00:00<00:00, 170.96it/s]
Casting the dataset: 100%|██████████| 53/53 [00:00<00:00, 26208.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 272.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 349.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 458.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 150.86 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAI

{'eval_loss': 0.8360763788223267, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0812, 'eval_samples_per_second': 12.315, 'eval_steps_per_second': 12.315, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.81s/it]

{'eval_loss': 0.7663393020629883, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1143, 'eval_samples_per_second': 8.752, 'eval_steps_per_second': 8.752, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:41<00:00,  1.80s/it]

{'eval_loss': 0.7505645155906677, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1142, 'eval_samples_per_second': 8.753, 'eval_steps_per_second': 8.753, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.04s/it]


{'train_runtime': 42.7674, 'train_samples_per_second': 3.718, 'train_steps_per_second': 0.491, 'train_loss': 0.7820578075590587, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 268.38it/s]
100%|██████████| 1/1 [00:00<00:00, 167.61it/s]
100%|██████████| 1/1 [00:00<00:00, 167.12it/s]
Casting the dataset: 100%|██████████| 53/53 [00:00<00:00, 26498.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.14 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 260.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 123.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 1305.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TR

{'eval_loss': 0.8360763788223267, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0764, 'eval_samples_per_second': 13.093, 'eval_steps_per_second': 13.093, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.79s/it]

{'eval_loss': 0.7663393020629883, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0882, 'eval_samples_per_second': 11.333, 'eval_steps_per_second': 11.333, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:41<00:00,  1.78s/it]

{'eval_loss': 0.7505645155906677, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1071, 'eval_samples_per_second': 9.338, 'eval_steps_per_second': 9.338, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.02s/it]


{'train_runtime': 42.3837, 'train_samples_per_second': 3.751, 'train_steps_per_second': 0.495, 'train_loss': 0.7820578075590587, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.83it/s]
100%|██████████| 1/1 [00:00<00:00, 84.84it/s]
100%|██████████| 1/1 [00:00<00:00, 451.63it/s]
Casting the dataset: 100%|██████████| 54/54 [00:00<00:00, 7735.66 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1093.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 883.20 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 267.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 546.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 343.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should prob

{'eval_loss': 0.8360660076141357, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0805, 'eval_samples_per_second': 12.415, 'eval_steps_per_second': 12.415, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:13,  1.86s/it]

{'eval_loss': 0.7651806473731995, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0711, 'eval_samples_per_second': 14.074, 'eval_steps_per_second': 14.074, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:42<00:00,  1.87s/it]

{'eval_loss': 0.7492283582687378, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.984, 'eval_steps_per_second': 11.984, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.07s/it]


{'train_runtime': 43.467, 'train_samples_per_second': 3.727, 'train_steps_per_second': 0.483, 'train_loss': 0.7812480018252418, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 82.92it/s]
100%|██████████| 1/1 [00:00<00:00, 166.57it/s]
100%|██████████| 1/1 [00:00<00:00, 147.85it/s]
Casting the dataset: 100%|██████████| 54/54 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 494.61 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 255.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 145.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 382.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN 

{'eval_loss': 0.8360660076141357, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0822, 'eval_samples_per_second': 12.161, 'eval_steps_per_second': 12.161, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.85s/it]

{'eval_loss': 0.7651806473731995, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0766, 'eval_samples_per_second': 13.056, 'eval_steps_per_second': 13.056, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:41<00:00,  1.85s/it]

{'eval_loss': 0.7492283582687378, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0822, 'eval_samples_per_second': 12.172, 'eval_steps_per_second': 12.172, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.05s/it]


{'train_runtime': 43.0243, 'train_samples_per_second': 3.765, 'train_steps_per_second': 0.488, 'train_loss': 0.7812480018252418, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 139.69it/s]
100%|██████████| 1/1 [00:00<00:00, 135.90it/s]
100%|██████████| 1/1 [00:00<00:00, 170.92it/s]
Casting the dataset: 100%|██████████| 54/54 [00:00<00:00, 26838.77 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 270.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 321.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TR

{'eval_loss': 0.8360660076141357, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0766, 'eval_samples_per_second': 13.06, 'eval_steps_per_second': 13.06, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.85s/it]

{'eval_loss': 0.7651806473731995, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.153, 'eval_steps_per_second': 12.153, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:42<00:00,  1.87s/it]

{'eval_loss': 0.7492283582687378, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.964, 'eval_steps_per_second': 11.964, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.07s/it]


{'train_runtime': 43.4219, 'train_samples_per_second': 3.731, 'train_steps_per_second': 0.484, 'train_loss': 0.7812480018252418, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.24it/s]
100%|██████████| 1/1 [00:00<00:00, 267.46it/s]
100%|██████████| 1/1 [00:00<00:00, 143.83it/s]
Casting the dataset: 100%|██████████| 54/54 [00:00<00:00, 26683.84 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.51 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 246.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 330.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 485.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.8360660076141357, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0792, 'eval_samples_per_second': 12.63, 'eval_steps_per_second': 12.63, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.83s/it]

{'eval_loss': 0.7651806473731995, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.251, 'eval_steps_per_second': 12.251, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:41<00:00,  1.84s/it]

{'eval_loss': 0.7492283582687378, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.709, 'eval_steps_per_second': 11.709, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.04s/it]


{'train_runtime': 42.8143, 'train_samples_per_second': 3.784, 'train_steps_per_second': 0.49, 'train_loss': 0.7812480018252418, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 85.71it/s]
100%|██████████| 1/1 [00:00<00:00, 144.25it/s]
100%|██████████| 1/1 [00:00<00:00, 247.70it/s]
Casting the dataset: 100%|██████████| 54/54 [00:00<00:00, 27011.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 868.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 146.01 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 258.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 113.53 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should prob

{'eval_loss': 0.8360660076141357, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0864, 'eval_samples_per_second': 11.572, 'eval_steps_per_second': 11.572, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.82s/it]

{'eval_loss': 0.7651806473731995, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.305, 'eval_steps_per_second': 12.305, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:41<00:00,  1.85s/it]

{'eval_loss': 0.7492283582687378, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0864, 'eval_samples_per_second': 11.573, 'eval_steps_per_second': 11.573, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.04s/it]


{'train_runtime': 42.949, 'train_samples_per_second': 3.772, 'train_steps_per_second': 0.489, 'train_loss': 0.7812480018252418, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 147.52it/s]
100%|██████████| 1/1 [00:00<00:00, 114.81it/s]
100%|██████████| 1/1 [00:00<00:00, 151.88it/s]
Casting the dataset: 100%|██████████| 55/55 [00:00<00:00, 27177.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 244.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 331.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should p

{'eval_loss': 0.8358782529830933, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0882, 'eval_samples_per_second': 11.341, 'eval_steps_per_second': 11.341, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:13,  1.90s/it]

{'eval_loss': 0.7645866870880127, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.073, 'eval_steps_per_second': 12.073, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:42<00:00,  1.90s/it]

{'eval_loss': 0.7487874031066895, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.715, 'eval_steps_per_second': 11.715, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.08s/it]


{'train_runtime': 43.7007, 'train_samples_per_second': 3.776, 'train_steps_per_second': 0.481, 'train_loss': 0.7811109452020555, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 194.50it/s]
100%|██████████| 1/1 [00:00<00:00, 132.04it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
Casting the dataset: 100%|██████████| 55/55 [00:00<00:00, 55017.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1232.89 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 277.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TR

{'eval_loss': 0.8358782529830933, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0901, 'eval_samples_per_second': 11.1, 'eval_steps_per_second': 11.1, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:28<00:13,  1.92s/it]

{'eval_loss': 0.7645866870880127, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.271, 'eval_steps_per_second': 12.271, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:42<00:00,  1.92s/it]

{'eval_loss': 0.7487874031066895, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0825, 'eval_samples_per_second': 12.12, 'eval_steps_per_second': 12.12, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.10s/it]


{'train_runtime': 43.9978, 'train_samples_per_second': 3.75, 'train_steps_per_second': 0.477, 'train_loss': 0.7811109452020555, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.80it/s]
100%|██████████| 1/1 [00:00<00:00, 213.66it/s]
100%|██████████| 1/1 [00:00<00:00, 148.47it/s]
Casting the dataset: 100%|██████████| 55/55 [00:00<00:00, 27171.58 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 251.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 142.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 502.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this 

{'eval_loss': 0.8358782529830933, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0864, 'eval_samples_per_second': 11.57, 'eval_steps_per_second': 11.57, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:13,  1.91s/it]

{'eval_loss': 0.7645866870880127, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0955, 'eval_samples_per_second': 10.468, 'eval_steps_per_second': 10.468, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:42<00:00,  1.90s/it]

{'eval_loss': 0.7487874031066895, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0804, 'eval_samples_per_second': 12.436, 'eval_steps_per_second': 12.436, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.08s/it]


{'train_runtime': 43.6546, 'train_samples_per_second': 3.78, 'train_steps_per_second': 0.481, 'train_loss': 0.7811109452020555, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 146.43it/s]
100%|██████████| 1/1 [00:00<00:00, 2490.68it/s]
100%|██████████| 1/1 [00:00<00:00, 153.17it/s]
Casting the dataset: 100%|██████████| 55/55 [00:00<00:00, 29383.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 512.13 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 260.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 462.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 144.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 254.85 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should p

{'eval_loss': 0.8358782529830933, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0853, 'eval_samples_per_second': 11.724, 'eval_steps_per_second': 11.724, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:28<00:13,  1.94s/it]

{'eval_loss': 0.7645866870880127, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0774, 'eval_samples_per_second': 12.921, 'eval_steps_per_second': 12.921, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:43<00:00,  1.93s/it]

{'eval_loss': 0.7487874031066895, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0764, 'eval_samples_per_second': 13.089, 'eval_steps_per_second': 13.089, 'epoch': 3.0}


100%|██████████| 21/21 [00:44<00:00,  2.11s/it]


{'train_runtime': 44.2726, 'train_samples_per_second': 3.727, 'train_steps_per_second': 0.474, 'train_loss': 0.7811109452020555, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 174.15it/s]
100%|██████████| 1/1 [00:00<00:00, 215.61it/s]
100%|██████████| 1/1 [00:00<00:00, 465.41it/s]
Casting the dataset: 100%|██████████| 55/55 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 255.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 1345.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 337.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TR

{'eval_loss': 0.8358782529830933, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0757, 'eval_samples_per_second': 13.217, 'eval_steps_per_second': 13.217, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:13,  1.91s/it]

{'eval_loss': 0.7645866870880127, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.183, 'eval_steps_per_second': 12.183, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:42<00:00,  1.91s/it]

{'eval_loss': 0.7487874031066895, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.0, 'eval_steps_per_second': 12.0, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.08s/it]


{'train_runtime': 43.7131, 'train_samples_per_second': 3.775, 'train_steps_per_second': 0.48, 'train_loss': 0.7811109452020555, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 198.75it/s]
100%|██████████| 1/1 [00:00<00:00, 151.08it/s]
100%|██████████| 1/1 [00:00<00:00, 159.30it/s]
Casting the dataset: 100%|██████████| 56/56 [00:00<00:00, 29833.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 140.59 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 261.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 769.03 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this m

{'eval_loss': 0.8353980183601379, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.968, 'eval_steps_per_second': 11.968, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:28<00:13,  1.96s/it]

{'eval_loss': 0.7642039656639099, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0857, 'eval_samples_per_second': 11.673, 'eval_steps_per_second': 11.673, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:42<00:00,  1.95s/it]

{'eval_loss': 0.7482785582542419, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.13, 'eval_steps_per_second': 12.13, 'epoch': 3.0}


100%|██████████| 21/21 [00:44<00:00,  2.10s/it]


{'train_runtime': 44.1645, 'train_samples_per_second': 3.804, 'train_steps_per_second': 0.475, 'train_loss': 0.7806986854189918, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 88.80it/s]
100%|██████████| 1/1 [00:00<00:00, 166.27it/s]
100%|██████████| 1/1 [00:00<00:00, 145.34it/s]
Casting the dataset: 100%|██████████| 56/56 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 147.15 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 918.80 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 248.34 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 266.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 450.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 330.29 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should proba

{'eval_loss': 0.8353980183601379, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0764, 'eval_samples_per_second': 13.085, 'eval_steps_per_second': 13.085, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:28<00:13,  1.95s/it]

{'eval_loss': 0.7642039656639099, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0764, 'eval_samples_per_second': 13.092, 'eval_steps_per_second': 13.092, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:42<00:00,  1.95s/it]

{'eval_loss': 0.7482785582542419, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0803, 'eval_samples_per_second': 12.454, 'eval_steps_per_second': 12.454, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.09s/it]


{'train_runtime': 43.9561, 'train_samples_per_second': 3.822, 'train_steps_per_second': 0.478, 'train_loss': 0.7806986854189918, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 115.23it/s]
100%|██████████| 1/1 [00:00<00:00, 94.55it/s]
100%|██████████| 1/1 [00:00<00:00, 197.37it/s]
Casting the dataset: 100%|██████████| 56/56 [00:00<00:00, 28005.37 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 978.38 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 258.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 144.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 127.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 368.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAI

{'eval_loss': 0.8353980183601379, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0764, 'eval_samples_per_second': 13.083, 'eval_steps_per_second': 13.083, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:28<00:13,  1.97s/it]

{'eval_loss': 0.7642039656639099, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.0, 'eval_steps_per_second': 12.0, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:43<00:00,  1.99s/it]

{'eval_loss': 0.7482785582542419, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0757, 'eval_samples_per_second': 13.218, 'eval_steps_per_second': 13.218, 'epoch': 3.0}


100%|██████████| 21/21 [00:44<00:00,  2.13s/it]


{'train_runtime': 44.6803, 'train_samples_per_second': 3.76, 'train_steps_per_second': 0.47, 'train_loss': 0.7806986854189918, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 240.57it/s]
100%|██████████| 1/1 [00:00<00:00, 599.10it/s]
100%|██████████| 1/1 [00:00<00:00, 103.72it/s]
Casting the dataset: 100%|██████████| 56/56 [00:00<00:00, 28005.37 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 238.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 311.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 147.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 144.87 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this m

{'eval_loss': 0.8353980183601379, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.927, 'eval_steps_per_second': 11.927, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:28<00:13,  1.96s/it]

{'eval_loss': 0.7642039656639099, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0753, 'eval_samples_per_second': 13.286, 'eval_steps_per_second': 13.286, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:42<00:00,  1.96s/it]

{'eval_loss': 0.7482785582542419, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.661, 'eval_steps_per_second': 11.661, 'epoch': 3.0}


100%|██████████| 21/21 [00:44<00:00,  2.10s/it]


{'train_runtime': 44.1456, 'train_samples_per_second': 3.806, 'train_steps_per_second': 0.476, 'train_loss': 0.7806986854189918, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.66it/s]
100%|██████████| 1/1 [00:00<00:00, 147.49it/s]
100%|██████████| 1/1 [00:00<00:00, 315.98it/s]
Casting the dataset: 100%|██████████| 56/56 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 519.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 261.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 248.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 4963.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 340.61 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.8353980183601379, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0842, 'eval_samples_per_second': 11.883, 'eval_steps_per_second': 11.883, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:28<00:13,  1.97s/it]

{'eval_loss': 0.7642039656639099, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0803, 'eval_samples_per_second': 12.459, 'eval_steps_per_second': 12.459, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:43<00:00,  2.01s/it]

{'eval_loss': 0.7482785582542419, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.993, 'eval_steps_per_second': 11.993, 'epoch': 3.0}


100%|██████████| 21/21 [00:44<00:00,  2.13s/it]


{'train_runtime': 44.6459, 'train_samples_per_second': 3.763, 'train_steps_per_second': 0.47, 'train_loss': 0.7806986854189918, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 152.11it/s]
100%|██████████| 1/1 [00:00<00:00, 173.52it/s]
100%|██████████| 1/1 [00:00<00:00, 150.16it/s]
Casting the dataset: 100%|██████████| 57/57 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 256.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 635.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 786.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRA

{'eval_loss': 0.823789656162262, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0805, 'eval_samples_per_second': 12.418, 'eval_steps_per_second': 12.418, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.55s/it]

{'eval_loss': 0.7375302910804749, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0763, 'eval_samples_per_second': 13.098, 'eval_steps_per_second': 13.098, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:44<00:00,  1.56s/it]

{'eval_loss': 0.706311821937561, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.306, 'eval_steps_per_second': 12.306, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.92s/it]


{'train_runtime': 46.1078, 'train_samples_per_second': 3.709, 'train_steps_per_second': 0.521, 'train_loss': 0.7059560616811117, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 139.03it/s]
100%|██████████| 1/1 [00:00<00:00, 150.11it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
Casting the dataset: 100%|██████████| 57/57 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 250.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 491.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 331.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 125.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-strea

{'eval_loss': 0.7318994402885437, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0909, 'eval_samples_per_second': 11.004, 'eval_steps_per_second': 11.004, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.58s/it]

{'eval_loss': 0.6041494607925415, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0919, 'eval_samples_per_second': 10.88, 'eval_steps_per_second': 10.88, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.55s/it]

{'eval_loss': 0.5876171588897705, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.264, 'eval_steps_per_second': 12.264, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.93s/it]


{'train_runtime': 46.4117, 'train_samples_per_second': 3.684, 'train_steps_per_second': 0.517, 'train_loss': 0.7869168917338053, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 149.37it/s]
100%|██████████| 1/1 [00:00<00:00, 115.19it/s]
100%|██████████| 1/1 [00:00<00:00, 73.44it/s]
Casting the dataset: 100%|██████████| 57/57 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.20 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 248.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream

{'eval_loss': 0.7318994402885437, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.27, 'eval_steps_per_second': 12.27, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.55s/it]

{'eval_loss': 0.6041494607925415, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.548, 'eval_steps_per_second': 11.548, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:44<00:00,  1.55s/it]

{'eval_loss': 0.5876171588897705, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0806, 'eval_samples_per_second': 12.405, 'eval_steps_per_second': 12.405, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.92s/it]


{'train_runtime': 46.097, 'train_samples_per_second': 3.71, 'train_steps_per_second': 0.521, 'train_loss': 0.7869168917338053, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 163.62it/s]
100%|██████████| 1/1 [00:00<00:00, 147.11it/s]
100%|██████████| 1/1 [00:00<00:00, 175.01it/s]
Casting the dataset: 100%|██████████| 57/57 [00:00<00:00, 25447.08 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 511.25 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 247.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 114.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 145.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 334.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7318994402885437, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.064, 'eval_steps_per_second': 12.064, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.57s/it]

{'eval_loss': 0.6041494607925415, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.154, 'eval_steps_per_second': 12.154, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.57s/it]

{'eval_loss': 0.5876171588897705, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0825, 'eval_samples_per_second': 12.128, 'eval_steps_per_second': 12.128, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.93s/it]


{'train_runtime': 46.4193, 'train_samples_per_second': 3.684, 'train_steps_per_second': 0.517, 'train_loss': 0.7869168917338053, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 147.51it/s]
100%|██████████| 1/1 [00:00<00:00, 193.30it/s]
100%|██████████| 1/1 [00:00<00:00, 322.96it/s]
Casting the dataset: 100%|██████████| 57/57 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 974.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 985.74 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 270.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this mo

{'eval_loss': 0.7318994402885437, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.104, 'eval_steps_per_second': 12.104, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.55s/it]

{'eval_loss': 0.6041494607925415, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0744, 'eval_samples_per_second': 13.447, 'eval_steps_per_second': 13.447, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:44<00:00,  1.54s/it]

{'eval_loss': 0.5876171588897705, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.89, 'eval_steps_per_second': 11.89, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.92s/it]


{'train_runtime': 46.0207, 'train_samples_per_second': 3.716, 'train_steps_per_second': 0.522, 'train_loss': 0.7869168917338053, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 147.12it/s]
100%|██████████| 1/1 [00:00<00:00, 141.99it/s]
100%|██████████| 1/1 [00:00<00:00, 169.65it/s]
Casting the dataset: 100%|██████████| 58/58 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 146.21 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 915.39 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 272.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 363.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 145.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN

{'eval_loss': 0.732743501663208, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.011, 'eval_steps_per_second': 12.011, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.62s/it]

{'eval_loss': 0.6028785109519958, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0764, 'eval_samples_per_second': 13.096, 'eval_steps_per_second': 13.096, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.65s/it]

{'eval_loss': 0.5860692858695984, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.302, 'eval_steps_per_second': 12.302, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.95s/it]


{'train_runtime': 46.9156, 'train_samples_per_second': 3.709, 'train_steps_per_second': 0.512, 'train_loss': 0.7844263712565104, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 111.54it/s]
100%|██████████| 1/1 [00:00<00:00, 167.57it/s]
100%|██████████| 1/1 [00:00<00:00, 94.99it/s]
Casting the dataset: 100%|██████████| 58/58 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 531.19 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 267.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN

{'eval_loss': 0.732743501663208, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0743, 'eval_samples_per_second': 13.456, 'eval_steps_per_second': 13.456, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.61s/it]

{'eval_loss': 0.6028785109519958, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0825, 'eval_samples_per_second': 12.122, 'eval_steps_per_second': 12.122, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.60s/it]

{'eval_loss': 0.5860692858695984, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0795, 'eval_samples_per_second': 12.578, 'eval_steps_per_second': 12.578, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.94s/it]


{'train_runtime': 46.4947, 'train_samples_per_second': 3.742, 'train_steps_per_second': 0.516, 'train_loss': 0.7844263712565104, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 2805.55it/s]
100%|██████████| 1/1 [00:00<00:00, 113.09it/s]
100%|██████████| 1/1 [00:00<00:00, 78.62it/s]
Casting the dataset: 100%|██████████| 58/58 [00:00<00:00, 27550.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 3013.15 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 247.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 113.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pr

{'eval_loss': 0.732743501663208, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0766, 'eval_samples_per_second': 13.061, 'eval_steps_per_second': 13.061, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:13,  1.64s/it]

{'eval_loss': 0.6028785109519958, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.965, 'eval_steps_per_second': 11.965, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.64s/it]

{'eval_loss': 0.5860692858695984, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.137, 'eval_steps_per_second': 12.137, 'epoch': 3.0}


100%|██████████| 24/24 [00:47<00:00,  1.97s/it]


{'train_runtime': 47.1683, 'train_samples_per_second': 3.689, 'train_steps_per_second': 0.509, 'train_loss': 0.7844263712565104, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.31it/s]
100%|██████████| 1/1 [00:00<00:00, 169.64it/s]
100%|██████████| 1/1 [00:00<00:00, 143.47it/s]
Casting the dataset: 100%|██████████| 58/58 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 5053.38 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 487.82 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 259.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 985.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this mo

{'eval_loss': 0.732743501663208, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0882, 'eval_samples_per_second': 11.342, 'eval_steps_per_second': 11.342, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.60s/it]

{'eval_loss': 0.6028785109519958, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0832, 'eval_samples_per_second': 12.012, 'eval_steps_per_second': 12.012, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.61s/it]

{'eval_loss': 0.5860692858695984, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.087, 'eval_samples_per_second': 11.497, 'eval_steps_per_second': 11.497, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.94s/it]


{'train_runtime': 46.6042, 'train_samples_per_second': 3.734, 'train_steps_per_second': 0.515, 'train_loss': 0.7844263712565104, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 139.08it/s]
100%|██████████| 1/1 [00:00<00:00, 94.42it/s]
100%|██████████| 1/1 [00:00<00:00, 179.43it/s]
Casting the dataset: 100%|██████████| 58/58 [00:00<00:00, 22234.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 969.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 245.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAI

{'eval_loss': 0.732743501663208, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0764, 'eval_samples_per_second': 13.091, 'eval_steps_per_second': 13.091, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.61s/it]

{'eval_loss': 0.6028785109519958, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.476, 'eval_steps_per_second': 11.476, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.63s/it]

{'eval_loss': 0.5860692858695984, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.059, 'eval_steps_per_second': 12.059, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.95s/it]


{'train_runtime': 46.7777, 'train_samples_per_second': 3.72, 'train_steps_per_second': 0.513, 'train_loss': 0.7844263712565104, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 87.13it/s]
100%|██████████| 1/1 [00:00<00:00, 103.50it/s]
100%|██████████| 1/1 [00:00<00:00, 140.81it/s]
Casting the dataset: 100%|██████████| 59/59 [00:00<00:00, 8442.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 233.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 331.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this mo

{'eval_loss': 0.7328381538391113, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0903, 'eval_samples_per_second': 11.068, 'eval_steps_per_second': 11.068, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:13,  1.65s/it]

{'eval_loss': 0.6028496026992798, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0904, 'eval_samples_per_second': 11.057, 'eval_steps_per_second': 11.057, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.65s/it]

{'eval_loss': 0.5843009352684021, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0945, 'eval_samples_per_second': 10.58, 'eval_steps_per_second': 10.58, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.95s/it]


{'train_runtime': 46.8351, 'train_samples_per_second': 3.779, 'train_steps_per_second': 0.512, 'train_loss': 0.7830787499745687, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 130.26it/s]
100%|██████████| 1/1 [00:00<00:00, 168.53it/s]
100%|██████████| 1/1 [00:00<00:00, 176.68it/s]
Casting the dataset: 100%|██████████| 59/59 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 249.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 147.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.81 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRA

{'eval_loss': 0.7328381538391113, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1292, 'eval_samples_per_second': 7.741, 'eval_steps_per_second': 7.741, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:13,  1.68s/it]

{'eval_loss': 0.6028496026992798, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1288, 'eval_samples_per_second': 7.765, 'eval_steps_per_second': 7.765, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.66s/it]

{'eval_loss': 0.5843009352684021, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.1021, 'eval_samples_per_second': 9.795, 'eval_steps_per_second': 9.795, 'epoch': 3.0}


100%|██████████| 24/24 [00:47<00:00,  1.98s/it]


{'train_runtime': 47.4424, 'train_samples_per_second': 3.731, 'train_steps_per_second': 0.506, 'train_loss': 0.7830787499745687, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 115.90it/s]
100%|██████████| 1/1 [00:00<00:00, 131.62it/s]
100%|██████████| 1/1 [00:00<00:00, 85.39it/s]
Casting the dataset: 100%|██████████| 59/59 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 246.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 113.95 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-strea

{'eval_loss': 0.7328381538391113, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1002, 'eval_samples_per_second': 9.976, 'eval_steps_per_second': 9.976, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:13,  1.65s/it]

{'eval_loss': 0.6028496026992798, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.965, 'eval_steps_per_second': 11.965, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.66s/it]

{'eval_loss': 0.5843009352684021, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.1292, 'eval_samples_per_second': 7.74, 'eval_steps_per_second': 7.74, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.95s/it]


{'train_runtime': 46.9075, 'train_samples_per_second': 3.773, 'train_steps_per_second': 0.512, 'train_loss': 0.7830787499745687, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 365.10it/s]
100%|██████████| 1/1 [00:00<00:00, 110.48it/s]
100%|██████████| 1/1 [00:00<00:00, 3037.15it/s]
Casting the dataset: 100%|██████████| 59/59 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 235.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 331.59 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stre

{'eval_loss': 0.7328381538391113, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1356, 'eval_samples_per_second': 7.372, 'eval_steps_per_second': 7.372, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:13,  1.65s/it]

{'eval_loss': 0.6028496026992798, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1322, 'eval_samples_per_second': 7.565, 'eval_steps_per_second': 7.565, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.72s/it]

{'eval_loss': 0.5843009352684021, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0935, 'eval_samples_per_second': 10.699, 'eval_steps_per_second': 10.699, 'epoch': 3.0}


100%|██████████| 24/24 [00:47<00:00,  1.98s/it]


{'train_runtime': 47.5299, 'train_samples_per_second': 3.724, 'train_steps_per_second': 0.505, 'train_loss': 0.7830787499745687, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 161.47it/s]
100%|██████████| 1/1 [00:00<00:00, 140.12it/s]
Casting the dataset: 100%|██████████| 59/59 [00:00<00:00, 29502.14 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 687.82 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 969.11 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 250.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.75 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TR

{'eval_loss': 0.7328381538391113, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1299, 'eval_samples_per_second': 7.696, 'eval_steps_per_second': 7.696, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:13,  1.65s/it]

{'eval_loss': 0.6028496026992798, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1385, 'eval_samples_per_second': 7.22, 'eval_steps_per_second': 7.22, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.63s/it]

{'eval_loss': 0.5843009352684021, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.1312, 'eval_samples_per_second': 7.624, 'eval_steps_per_second': 7.624, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.95s/it]


{'train_runtime': 46.8649, 'train_samples_per_second': 3.777, 'train_steps_per_second': 0.512, 'train_loss': 0.7830787499745687, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 1618.17it/s]
100%|██████████| 1/1 [00:00<00:00, 165.44it/s]
100%|██████████| 1/1 [00:00<00:00, 103.30it/s]
Casting the dataset: 100%|██████████| 60/60 [00:00<00:00, 30002.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 993.91 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 247.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 141.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 147.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 147.11 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TR

{'eval_loss': 0.7315797209739685, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1237, 'eval_samples_per_second': 8.082, 'eval_steps_per_second': 8.082, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:13,  1.75s/it]

{'eval_loss': 0.6035921573638916, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.19, 'eval_steps_per_second': 12.19, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.74s/it]

{'eval_loss': 0.5845718383789062, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.995, 'eval_steps_per_second': 11.995, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.01s/it]


{'train_runtime': 48.128, 'train_samples_per_second': 3.74, 'train_steps_per_second': 0.499, 'train_loss': 0.7813993295033773, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 90.82it/s]
100%|██████████| 1/1 [00:00<00:00, 165.56it/s]
100%|██████████| 1/1 [00:00<00:00, 145.11it/s]
Casting the dataset: 100%|██████████| 60/60 [00:00<00:00, 8716.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 851.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 246.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 331.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN

{'eval_loss': 0.7315797209739685, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.139, 'eval_steps_per_second': 12.139, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:13,  1.69s/it]

{'eval_loss': 0.6035921573638916, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.01, 'eval_steps_per_second': 12.01, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.69s/it]

{'eval_loss': 0.5845718383789062, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.618, 'eval_steps_per_second': 11.618, 'epoch': 3.0}


100%|██████████| 24/24 [00:47<00:00,  1.96s/it]


{'train_runtime': 47.0258, 'train_samples_per_second': 3.828, 'train_steps_per_second': 0.51, 'train_loss': 0.7813993295033773, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 147.54it/s]
100%|██████████| 1/1 [00:00<00:00, 166.26it/s]
100%|██████████| 1/1 [00:00<00:00, 364.53it/s]
Casting the dataset: 100%|██████████| 60/60 [00:00<00:00, 29991.45 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 987.82 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1252.03 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 250.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 147.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 343.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 113.67 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pr

{'eval_loss': 0.7315797209739685, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0795, 'eval_samples_per_second': 12.571, 'eval_steps_per_second': 12.571, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:13,  1.70s/it]

{'eval_loss': 0.6035921573638916, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.885, 'eval_steps_per_second': 11.885, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.69s/it]

{'eval_loss': 0.5845718383789062, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0805, 'eval_samples_per_second': 12.421, 'eval_steps_per_second': 12.421, 'epoch': 3.0}


100%|██████████| 24/24 [00:47<00:00,  1.96s/it]


{'train_runtime': 47.0069, 'train_samples_per_second': 3.829, 'train_steps_per_second': 0.511, 'train_loss': 0.7813993295033773, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 147.09it/s]
100%|██████████| 1/1 [00:00<00:00, 238.53it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
Casting the dataset: 100%|██████████| 60/60 [00:00<00:00, 29433.71 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 233.53 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 348.19 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on 

{'eval_loss': 0.7315797209739685, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.609, 'eval_steps_per_second': 11.609, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:13,  1.70s/it]

{'eval_loss': 0.6035921573638916, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.006, 'eval_steps_per_second': 12.006, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.70s/it]

{'eval_loss': 0.5845718383789062, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0803, 'eval_samples_per_second': 12.451, 'eval_steps_per_second': 12.451, 'epoch': 3.0}


100%|██████████| 24/24 [00:47<00:00,  1.97s/it]


{'train_runtime': 47.2528, 'train_samples_per_second': 3.809, 'train_steps_per_second': 0.508, 'train_loss': 0.7813993295033773, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 199.50it/s]
100%|██████████| 1/1 [00:00<00:00, 2882.68it/s]
100%|██████████| 1/1 [00:00<00:00, 148.88it/s]
Casting the dataset: 100%|██████████| 60/60 [00:00<00:00, 55750.61 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 221.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 459.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 140.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 405.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
Y

{'eval_loss': 0.7315797209739685, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0865, 'eval_samples_per_second': 11.556, 'eval_steps_per_second': 11.556, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:13,  1.69s/it]

{'eval_loss': 0.6035921573638916, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0893, 'eval_samples_per_second': 11.192, 'eval_steps_per_second': 11.192, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.69s/it]

{'eval_loss': 0.5845718383789062, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0804, 'eval_samples_per_second': 12.442, 'eval_steps_per_second': 12.442, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.95s/it]


{'train_runtime': 46.8195, 'train_samples_per_second': 3.845, 'train_steps_per_second': 0.513, 'train_loss': 0.7813993295033773, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 192.93it/s]
100%|██████████| 1/1 [00:00<00:00, 170.00it/s]
100%|██████████| 1/1 [00:00<00:00, 147.18it/s]
Casting the dataset: 100%|██████████| 61/61 [00:00<00:00, 30502.21 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 227.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 4788.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 490.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 267.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this 

{'eval_loss': 0.7322956919670105, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0909, 'eval_samples_per_second': 10.997, 'eval_steps_per_second': 10.997, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:14,  1.77s/it]

{'eval_loss': 0.6015439033508301, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0786, 'eval_samples_per_second': 12.726, 'eval_steps_per_second': 12.726, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.76s/it]

{'eval_loss': 0.5817645192146301, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0736, 'eval_samples_per_second': 13.596, 'eval_steps_per_second': 13.596, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.00s/it]


{'train_runtime': 48.0853, 'train_samples_per_second': 3.806, 'train_steps_per_second': 0.499, 'train_loss': 0.7807329495747884, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 170.01it/s]
100%|██████████| 1/1 [00:00<00:00, 171.53it/s]
100%|██████████| 1/1 [00:00<00:00, 329.27it/s]
Casting the dataset: 100%|██████████| 61/61 [00:00<00:00, 8714.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 147.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 241.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 842.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this mo

{'eval_loss': 0.7322956919670105, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0908, 'eval_samples_per_second': 11.008, 'eval_steps_per_second': 11.008, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:14,  1.77s/it]

{'eval_loss': 0.6015439033508301, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0803, 'eval_samples_per_second': 12.449, 'eval_steps_per_second': 12.449, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.76s/it]

{'eval_loss': 0.5817645192146301, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.151, 'eval_steps_per_second': 12.151, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.00s/it]


{'train_runtime': 48.02, 'train_samples_per_second': 3.811, 'train_steps_per_second': 0.5, 'train_loss': 0.7807329495747884, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 806.13it/s]
100%|██████████| 1/1 [00:00<00:00, 93.01it/s]
100%|██████████| 1/1 [00:00<00:00, 84.14it/s]
Casting the dataset: 100%|██████████| 61/61 [00:00<00:00, 27650.77 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 229.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.85 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this mo

{'eval_loss': 0.7322956919670105, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0775, 'eval_samples_per_second': 12.898, 'eval_steps_per_second': 12.898, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:14,  1.76s/it]

{'eval_loss': 0.6015439033508301, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0763, 'eval_samples_per_second': 13.112, 'eval_steps_per_second': 13.112, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.75s/it]

{'eval_loss': 0.5817645192146301, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0796, 'eval_samples_per_second': 12.567, 'eval_steps_per_second': 12.567, 'epoch': 3.0}


100%|██████████| 24/24 [00:47<00:00,  2.00s/it]


{'train_runtime': 47.884, 'train_samples_per_second': 3.822, 'train_steps_per_second': 0.501, 'train_loss': 0.7807329495747884, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.30it/s]
100%|██████████| 1/1 [00:00<00:00, 337.98it/s]
100%|██████████| 1/1 [00:00<00:00, 194.21it/s]
Casting the dataset: 100%|██████████| 61/61 [00:00<00:00, 30502.21 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 493.04 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 219.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 149.31 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 1010.92 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 330.55 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TR

{'eval_loss': 0.7322956919670105, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0763, 'eval_samples_per_second': 13.099, 'eval_steps_per_second': 13.099, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:14,  1.76s/it]

{'eval_loss': 0.6015439033508301, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0843, 'eval_samples_per_second': 11.868, 'eval_steps_per_second': 11.868, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.77s/it]

{'eval_loss': 0.5817645192146301, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0874, 'eval_samples_per_second': 11.436, 'eval_steps_per_second': 11.436, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.01s/it]


{'train_runtime': 48.1398, 'train_samples_per_second': 3.801, 'train_steps_per_second': 0.499, 'train_loss': 0.7807329495747884, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 150.38it/s]
100%|██████████| 1/1 [00:00<00:00, 241.00it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
Casting the dataset: 100%|██████████| 61/61 [00:00<00:00, 29445.57 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 694.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 234.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRA

{'eval_loss': 0.7322956919670105, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.175, 'eval_steps_per_second': 12.175, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:14,  1.76s/it]

{'eval_loss': 0.6015439033508301, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0765, 'eval_samples_per_second': 13.077, 'eval_steps_per_second': 13.077, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.76s/it]

{'eval_loss': 0.5817645192146301, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.995, 'eval_steps_per_second': 11.995, 'epoch': 3.0}


100%|██████████| 24/24 [00:47<00:00,  2.00s/it]


{'train_runtime': 47.9413, 'train_samples_per_second': 3.817, 'train_steps_per_second': 0.501, 'train_loss': 0.7807329495747884, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 146.68it/s]
100%|██████████| 1/1 [00:00<00:00, 139.14it/s]
100%|██████████| 1/1 [00:00<00:00, 170.12it/s]
Casting the dataset: 100%|██████████| 62/62 [00:00<00:00, 30994.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 987.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 223.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 66.72 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 539.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 148.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.7326089143753052, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.088, 'eval_samples_per_second': 11.362, 'eval_steps_per_second': 11.362, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:14,  1.83s/it]

{'eval_loss': 0.6020734310150146, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0765, 'eval_samples_per_second': 13.071, 'eval_steps_per_second': 13.071, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.80s/it]

{'eval_loss': 0.5817705392837524, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0764, 'eval_samples_per_second': 13.087, 'eval_steps_per_second': 13.087, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.02s/it]


{'train_runtime': 48.3842, 'train_samples_per_second': 3.844, 'train_steps_per_second': 0.496, 'train_loss': 0.7802079518636068, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 154.93it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 93.78it/s]
Casting the dataset: 100%|██████████| 62/62 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 514.13 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 229.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 306.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 128.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 151.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this mode

{'eval_loss': 0.7326089143753052, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0735, 'eval_samples_per_second': 13.611, 'eval_steps_per_second': 13.611, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:14,  1.80s/it]

{'eval_loss': 0.6020734310150146, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.27, 'eval_steps_per_second': 12.27, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.80s/it]

{'eval_loss': 0.5817705392837524, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0767, 'eval_samples_per_second': 13.037, 'eval_steps_per_second': 13.037, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.01s/it]


{'train_runtime': 48.3257, 'train_samples_per_second': 3.849, 'train_steps_per_second': 0.497, 'train_loss': 0.7802079518636068, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 163.95it/s]
100%|██████████| 1/1 [00:00<00:00, 126.20it/s]
100%|██████████| 1/1 [00:00<00:00, 124.95it/s]
Casting the dataset: 100%|██████████| 62/62 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 233.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 146.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRA

{'eval_loss': 0.7326089143753052, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0745, 'eval_samples_per_second': 13.423, 'eval_steps_per_second': 13.423, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:14,  1.81s/it]

{'eval_loss': 0.6020734310150146, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0765, 'eval_samples_per_second': 13.075, 'eval_steps_per_second': 13.075, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.81s/it]

{'eval_loss': 0.5817705392837524, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.62, 'eval_steps_per_second': 11.62, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.02s/it]


{'train_runtime': 48.4776, 'train_samples_per_second': 3.837, 'train_steps_per_second': 0.495, 'train_loss': 0.7802079518636068, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 176.73it/s]
100%|██████████| 1/1 [00:00<00:00, 90.04it/s]
100%|██████████| 1/1 [00:00<00:00, 1359.14it/s]
Casting the dataset: 100%|██████████| 62/62 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.45 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 228.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 560.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 335.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN

{'eval_loss': 0.7326089143753052, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0792, 'eval_samples_per_second': 12.624, 'eval_steps_per_second': 12.624, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:14,  1.82s/it]

{'eval_loss': 0.6020734310150146, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0764, 'eval_samples_per_second': 13.095, 'eval_steps_per_second': 13.095, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.82s/it]

{'eval_loss': 0.5817705392837524, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0733, 'eval_samples_per_second': 13.645, 'eval_steps_per_second': 13.645, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.02s/it]


{'train_runtime': 48.5971, 'train_samples_per_second': 3.827, 'train_steps_per_second': 0.494, 'train_loss': 0.7802079518636068, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 151.08it/s]
100%|██████████| 1/1 [00:00<00:00, 199.01it/s]
100%|██████████| 1/1 [00:00<00:00, 104.12it/s]
Casting the dataset: 100%|██████████| 62/62 [00:00<00:00, 33702.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 978.38 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 213.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 248.64 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 240.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pr

{'eval_loss': 0.7326089143753052, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.851, 'eval_steps_per_second': 11.851, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:14,  1.81s/it]

{'eval_loss': 0.6020734310150146, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.156, 'eval_steps_per_second': 12.156, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.81s/it]

{'eval_loss': 0.5817705392837524, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.261, 'eval_steps_per_second': 12.261, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.02s/it]


{'train_runtime': 48.5744, 'train_samples_per_second': 3.829, 'train_steps_per_second': 0.494, 'train_loss': 0.7802079518636068, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.38it/s]
100%|██████████| 1/1 [00:00<00:00, 177.87it/s]
100%|██████████| 1/1 [00:00<00:00, 146.91it/s]
Casting the dataset: 100%|██████████| 63/63 [00:00<00:00, 26731.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 223.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 121.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 309.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 252.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7320257425308228, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.263, 'eval_steps_per_second': 12.263, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:14,  1.85s/it]

{'eval_loss': 0.6023174524307251, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.01, 'eval_steps_per_second': 12.01, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.86s/it]

{'eval_loss': 0.5812777280807495, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0802, 'eval_samples_per_second': 12.476, 'eval_steps_per_second': 12.476, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.03s/it]


{'train_runtime': 48.628, 'train_samples_per_second': 3.887, 'train_steps_per_second': 0.494, 'train_loss': 0.7797067165374756, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 145.44it/s]
100%|██████████| 1/1 [00:00<00:00, 133.10it/s]
100%|██████████| 1/1 [00:00<00:00, 215.35it/s]
Casting the dataset: 100%|██████████| 63/63 [00:00<00:00, 31498.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 213.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 150.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 143.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this m

{'eval_loss': 0.7320257425308228, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.253, 'eval_steps_per_second': 12.253, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:14,  1.83s/it]

{'eval_loss': 0.6023174524307251, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.158, 'eval_steps_per_second': 12.158, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.84s/it]

{'eval_loss': 0.5812777280807495, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.846, 'eval_steps_per_second': 11.846, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.02s/it]


{'train_runtime': 48.4167, 'train_samples_per_second': 3.904, 'train_steps_per_second': 0.496, 'train_loss': 0.7797067165374756, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 58.90it/s]
100%|██████████| 1/1 [00:00<00:00, 919.40it/s]
100%|██████████| 1/1 [00:00<00:00, 276.92it/s]
Casting the dataset: 100%|██████████| 63/63 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 484.39 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 222.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 140.64 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN 

{'eval_loss': 0.7320257425308228, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0863, 'eval_samples_per_second': 11.592, 'eval_steps_per_second': 11.592, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:14,  1.84s/it]

{'eval_loss': 0.6023174524307251, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0753, 'eval_samples_per_second': 13.274, 'eval_steps_per_second': 13.274, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.85s/it]

{'eval_loss': 0.5812777280807495, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.989, 'eval_steps_per_second': 11.989, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.03s/it]


{'train_runtime': 48.5992, 'train_samples_per_second': 3.889, 'train_steps_per_second': 0.494, 'train_loss': 0.7797067165374756, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 92.58it/s]
100%|██████████| 1/1 [00:00<00:00, 90.38it/s]
100%|██████████| 1/1 [00:00<00:00, 168.55it/s]
Casting the dataset: 100%|██████████| 63/63 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 220.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 892.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 112.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should proba

{'eval_loss': 0.7320257425308228, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0771, 'eval_samples_per_second': 12.977, 'eval_steps_per_second': 12.977, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:14,  1.83s/it]

{'eval_loss': 0.6023174524307251, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0843, 'eval_samples_per_second': 11.858, 'eval_steps_per_second': 11.858, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.84s/it]

{'eval_loss': 0.5812777280807495, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0715, 'eval_samples_per_second': 13.977, 'eval_steps_per_second': 13.977, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.02s/it]


{'train_runtime': 48.4136, 'train_samples_per_second': 3.904, 'train_steps_per_second': 0.496, 'train_loss': 0.7797067165374756, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 127.23it/s]
100%|██████████| 1/1 [00:00<00:00, 125.08it/s]
100%|██████████| 1/1 [00:00<00:00, 137.86it/s]
Casting the dataset: 100%|██████████| 63/63 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 144.37 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.22 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 220.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAI

{'eval_loss': 0.7320257425308228, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.991, 'eval_steps_per_second': 11.991, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:15,  1.88s/it]

{'eval_loss': 0.6023174524307251, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0827, 'eval_samples_per_second': 12.095, 'eval_steps_per_second': 12.095, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.85s/it]

{'eval_loss': 0.5812777280807495, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0742, 'eval_samples_per_second': 13.473, 'eval_steps_per_second': 13.473, 'epoch': 3.0}


100%|██████████| 24/24 [00:49<00:00,  2.04s/it]


{'train_runtime': 49.0017, 'train_samples_per_second': 3.857, 'train_steps_per_second': 0.49, 'train_loss': 0.7797067165374756, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 111.19it/s]
100%|██████████| 1/1 [00:00<00:00, 166.10it/s]
100%|██████████| 1/1 [00:00<00:00, 321.30it/s]
Casting the dataset: 100%|██████████| 64/64 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 216.47 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.55 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRA

{'eval_loss': 0.7319764494895935, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0832, 'eval_samples_per_second': 12.018, 'eval_steps_per_second': 12.018, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:15,  1.90s/it]

{'eval_loss': 0.602016031742096, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0784, 'eval_samples_per_second': 12.751, 'eval_steps_per_second': 12.751, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.91s/it]

{'eval_loss': 0.5804283022880554, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.104, 'eval_steps_per_second': 12.104, 'epoch': 3.0}


100%|██████████| 24/24 [00:49<00:00,  2.04s/it]


{'train_runtime': 49.0278, 'train_samples_per_second': 3.916, 'train_steps_per_second': 0.49, 'train_loss': 0.7791812419891357, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 310.28it/s]
100%|██████████| 1/1 [00:00<00:00, 142.68it/s]
100%|██████████| 1/1 [00:00<00:00, 156.87it/s]
Casting the dataset: 100%|██████████| 64/64 [00:00<00:00, 29796.37 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 146.43 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 236.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 331.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this m

{'eval_loss': 0.7319764494895935, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0895, 'eval_samples_per_second': 11.178, 'eval_steps_per_second': 11.178, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:15,  1.92s/it]

{'eval_loss': 0.602016031742096, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0765, 'eval_samples_per_second': 13.071, 'eval_steps_per_second': 13.071, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:48<00:00,  1.92s/it]

{'eval_loss': 0.5804283022880554, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.199, 'eval_steps_per_second': 12.199, 'epoch': 3.0}


100%|██████████| 24/24 [00:49<00:00,  2.06s/it]


{'train_runtime': 49.5575, 'train_samples_per_second': 3.874, 'train_steps_per_second': 0.484, 'train_loss': 0.7791812419891357, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 578.52it/s]
100%|██████████| 1/1 [00:00<00:00, 102.71it/s]
100%|██████████| 1/1 [00:00<00:00, 311.22it/s]
Casting the dataset: 100%|██████████| 64/64 [00:00<00:00, 27065.48 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 977.24 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1220.34 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.26 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 217.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 368.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 323.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pr

{'eval_loss': 0.7319764494895935, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.292, 'eval_steps_per_second': 12.292, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:15,  1.89s/it]

{'eval_loss': 0.602016031742096, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.997, 'eval_steps_per_second': 11.997, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.88s/it]

{'eval_loss': 0.5804283022880554, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0775, 'eval_samples_per_second': 12.898, 'eval_steps_per_second': 12.898, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.03s/it]


{'train_runtime': 48.7134, 'train_samples_per_second': 3.941, 'train_steps_per_second': 0.493, 'train_loss': 0.7791812419891357, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 129.37it/s]
100%|██████████| 1/1 [00:00<00:00, 339.29it/s]
100%|██████████| 1/1 [00:00<00:00, 90.56it/s]
Casting the dataset: 100%|██████████| 64/64 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 145.37 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 891.46 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 214.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 349.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN

{'eval_loss': 0.7319764494895935, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.15, 'eval_steps_per_second': 12.15, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:15,  1.88s/it]

{'eval_loss': 0.602016031742096, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0832, 'eval_samples_per_second': 12.018, 'eval_steps_per_second': 12.018, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.92s/it]

{'eval_loss': 0.5804283022880554, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.523, 'eval_steps_per_second': 11.523, 'epoch': 3.0}


100%|██████████| 24/24 [00:49<00:00,  2.05s/it]


{'train_runtime': 49.2656, 'train_samples_per_second': 3.897, 'train_steps_per_second': 0.487, 'train_loss': 0.7791812419891357, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 144.43it/s]
100%|██████████| 1/1 [00:00<00:00, 147.92it/s]
100%|██████████| 1/1 [00:00<00:00, 168.30it/s]
Casting the dataset: 100%|██████████| 64/64 [00:00<00:00, 31655.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 148.72 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 223.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 337.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 158.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 114.11 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pr

{'eval_loss': 0.7319764494895935, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0773, 'eval_samples_per_second': 12.942, 'eval_steps_per_second': 12.942, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:15,  1.88s/it]

{'eval_loss': 0.602016031742096, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0812, 'eval_samples_per_second': 12.319, 'eval_steps_per_second': 12.319, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.88s/it]

{'eval_loss': 0.5804283022880554, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.267, 'eval_steps_per_second': 12.267, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.02s/it]


{'train_runtime': 48.4681, 'train_samples_per_second': 3.961, 'train_steps_per_second': 0.495, 'train_loss': 0.7791812419891357, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 148.60it/s]
100%|██████████| 1/1 [00:00<00:00, 145.85it/s]
100%|██████████| 1/1 [00:00<00:00, 143.98it/s]
Casting the dataset: 100%|██████████| 65/65 [00:00<00:00, 32498.48 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 217.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 113.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 115.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 949.58 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pr

{'eval_loss': 0.7262880802154541, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.031, 'eval_steps_per_second': 12.031, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:32<00:13,  1.53s/it]

{'eval_loss': 0.574077308177948, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.278, 'eval_steps_per_second': 12.278, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:49<00:00,  1.52s/it]

{'eval_loss': 0.5185704827308655, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0843, 'eval_samples_per_second': 11.862, 'eval_steps_per_second': 11.862, 'epoch': 3.0}


100%|██████████| 27/27 [00:51<00:00,  1.89s/it]


{'train_runtime': 51.0347, 'train_samples_per_second': 3.821, 'train_steps_per_second': 0.529, 'train_loss': 0.6954932742648654, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 144.99it/s]
100%|██████████| 1/1 [00:00<00:00, 452.80it/s]
100%|██████████| 1/1 [00:00<00:00, 145.09it/s]
Casting the dataset: 100%|██████████| 65/65 [00:00<00:00, 63431.77 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 216.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 411.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pr

{'eval_loss': 0.7803758978843689, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0773, 'eval_samples_per_second': 12.929, 'eval_steps_per_second': 12.929, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:32<00:13,  1.52s/it]

{'eval_loss': 0.7100860476493835, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0941, 'eval_samples_per_second': 10.622, 'eval_steps_per_second': 10.622, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:49<00:00,  1.52s/it]

{'eval_loss': 0.6654278635978699, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0835, 'eval_samples_per_second': 11.979, 'eval_steps_per_second': 11.979, 'epoch': 3.0}


100%|██████████| 27/27 [00:51<00:00,  1.89s/it]


{'train_runtime': 51.0418, 'train_samples_per_second': 3.82, 'train_steps_per_second': 0.529, 'train_loss': 0.622164973506221, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 125.24it/s]
100%|██████████| 1/1 [00:00<00:00, 165.95it/s]
100%|██████████| 1/1 [00:00<00:00, 130.80it/s]
Casting the dataset: 100%|██████████| 65/65 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 140.40 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 146.65 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 226.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 330.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this mod

{'eval_loss': 0.7803758978843689, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0764, 'eval_samples_per_second': 13.084, 'eval_steps_per_second': 13.084, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:32<00:13,  1.52s/it]

{'eval_loss': 0.7100860476493835, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.003, 'eval_steps_per_second': 12.003, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:49<00:00,  1.52s/it]

{'eval_loss': 0.6654278635978699, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.003, 'eval_steps_per_second': 12.003, 'epoch': 3.0}


100%|██████████| 27/27 [00:50<00:00,  1.89s/it]


{'train_runtime': 50.9356, 'train_samples_per_second': 3.828, 'train_steps_per_second': 0.53, 'train_loss': 0.622164973506221, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 145.57it/s]
100%|██████████| 1/1 [00:00<00:00, 147.65it/s]
100%|██████████| 1/1 [00:00<00:00, 195.86it/s]
Casting the dataset: 100%|██████████| 65/65 [00:00<00:00, 64973.73 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 226.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 3111.50 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this

{'eval_loss': 0.7803758978843689, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0953, 'eval_samples_per_second': 10.498, 'eval_steps_per_second': 10.498, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:32<00:13,  1.51s/it]

{'eval_loss': 0.7100860476493835, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0903, 'eval_samples_per_second': 11.074, 'eval_steps_per_second': 11.074, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:49<00:00,  1.55s/it]

{'eval_loss': 0.6654277443885803, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0781, 'eval_samples_per_second': 12.807, 'eval_steps_per_second': 12.807, 'epoch': 3.0}


100%|██████████| 27/27 [00:51<00:00,  1.90s/it]


{'train_runtime': 51.2927, 'train_samples_per_second': 3.802, 'train_steps_per_second': 0.526, 'train_loss': 0.622164973506221, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 144.88it/s]
100%|██████████| 1/1 [00:00<00:00, 168.81it/s]
100%|██████████| 1/1 [00:00<00:00, 148.67it/s]
Casting the dataset: 100%|██████████| 65/65 [00:00<00:00, 32263.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 533.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 222.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 127.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 357.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 251.79 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.7803758978843689, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.033, 'eval_steps_per_second': 12.033, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:32<00:13,  1.52s/it]

{'eval_loss': 0.7100860476493835, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0792, 'eval_samples_per_second': 12.62, 'eval_steps_per_second': 12.62, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:49<00:00,  1.51s/it]

{'eval_loss': 0.6654278635978699, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.266, 'eval_steps_per_second': 12.266, 'epoch': 3.0}


100%|██████████| 27/27 [00:50<00:00,  1.89s/it]


{'train_runtime': 50.9605, 'train_samples_per_second': 3.826, 'train_steps_per_second': 0.53, 'train_loss': 0.622164973506221, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 127.67it/s]
100%|██████████| 1/1 [00:00<00:00, 147.99it/s]
100%|██████████| 1/1 [00:00<00:00, 92.66it/s]
Casting the dataset: 100%|██████████| 66/66 [00:00<00:00, 9678.15 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 145.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 216.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 128.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 255.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAI

{'eval_loss': 0.7760001420974731, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.248, 'eval_steps_per_second': 12.248, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:14,  1.60s/it]

{'eval_loss': 0.7050943374633789, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.308, 'eval_steps_per_second': 12.308, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:50<00:00,  1.59s/it]

{'eval_loss': 0.6561811566352844, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.151, 'eval_steps_per_second': 12.151, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.93s/it]


{'train_runtime': 52.1236, 'train_samples_per_second': 3.799, 'train_steps_per_second': 0.518, 'train_loss': 0.6201725712528935, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 143.90it/s]
100%|██████████| 1/1 [00:00<00:00, 85.57it/s]
100%|██████████| 1/1 [00:00<00:00, 111.19it/s]
Casting the dataset: 100%|██████████| 66/66 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 978.15 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 220.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 724.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this mod

{'eval_loss': 0.7760001420974731, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0774, 'eval_samples_per_second': 12.928, 'eval_steps_per_second': 12.928, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:32<00:14,  1.58s/it]

{'eval_loss': 0.7050943374633789, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0794, 'eval_samples_per_second': 12.592, 'eval_steps_per_second': 12.592, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:49<00:00,  1.57s/it]

{'eval_loss': 0.6561811566352844, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.012, 'eval_steps_per_second': 12.012, 'epoch': 3.0}


100%|██████████| 27/27 [00:51<00:00,  1.90s/it]


{'train_runtime': 51.3635, 'train_samples_per_second': 3.855, 'train_steps_per_second': 0.526, 'train_loss': 0.6201725712528935, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 100.38it/s]
100%|██████████| 1/1 [00:00<00:00, 326.28it/s]
100%|██████████| 1/1 [00:00<00:00, 215.87it/s]
Casting the dataset: 100%|██████████| 66/66 [00:00<00:00, 9493.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 494.26 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 221.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 479.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this mo

{'eval_loss': 0.7760001420974731, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0765, 'eval_samples_per_second': 13.079, 'eval_steps_per_second': 13.079, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:14,  1.59s/it]

{'eval_loss': 0.7050943374633789, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0697, 'eval_samples_per_second': 14.342, 'eval_steps_per_second': 14.342, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:50<00:00,  1.60s/it]

{'eval_loss': 0.6561811566352844, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.11, 'eval_steps_per_second': 12.11, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.93s/it]


{'train_runtime': 52.143, 'train_samples_per_second': 3.797, 'train_steps_per_second': 0.518, 'train_loss': 0.6201725712528935, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 215.56it/s]
100%|██████████| 1/1 [00:00<00:00, 213.45it/s]
100%|██████████| 1/1 [00:00<00:00, 1504.95it/s]
Casting the dataset: 100%|██████████| 66/66 [00:00<00:00, 33022.08 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 221.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 336.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 144.92 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably T

{'eval_loss': 0.7760001420974731, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.486, 'eval_steps_per_second': 11.486, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:32<00:14,  1.57s/it]

{'eval_loss': 0.7050943374633789, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0765, 'eval_samples_per_second': 13.069, 'eval_steps_per_second': 13.069, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:50<00:00,  1.57s/it]

{'eval_loss': 0.6561811566352844, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0906, 'eval_samples_per_second': 11.038, 'eval_steps_per_second': 11.038, 'epoch': 3.0}


100%|██████████| 27/27 [00:51<00:00,  1.90s/it]


{'train_runtime': 51.3443, 'train_samples_per_second': 3.856, 'train_steps_per_second': 0.526, 'train_loss': 0.6201725712528935, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 149.60it/s]
100%|██████████| 1/1 [00:00<00:00, 3258.98it/s]
100%|██████████| 1/1 [00:00<00:00, 347.70it/s]
Casting the dataset: 100%|██████████| 66/66 [00:00<00:00, 26379.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 987.13 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 218.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 127.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 544.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 350.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7760001420974731, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.892, 'eval_steps_per_second': 11.892, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:14,  1.56s/it]

{'eval_loss': 0.7050943374633789, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.105, 'eval_steps_per_second': 12.105, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:50<00:00,  1.58s/it]

{'eval_loss': 0.6561811566352844, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.092, 'eval_samples_per_second': 10.874, 'eval_steps_per_second': 10.874, 'epoch': 3.0}


100%|██████████| 27/27 [00:51<00:00,  1.91s/it]


{'train_runtime': 51.5188, 'train_samples_per_second': 3.843, 'train_steps_per_second': 0.524, 'train_loss': 0.6201725712528935, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 148.44it/s]
100%|██████████| 1/1 [00:00<00:00, 3077.26it/s]
100%|██████████| 1/1 [00:00<00:00, 193.80it/s]
Casting the dataset: 100%|██████████| 67/67 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 805.20 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 261.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 223.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 331.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAI

{'eval_loss': 0.7749893665313721, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1119, 'eval_samples_per_second': 8.939, 'eval_steps_per_second': 8.939, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:14,  1.60s/it]

{'eval_loss': 0.7051089406013489, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1301, 'eval_samples_per_second': 7.687, 'eval_steps_per_second': 7.687, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:50<00:00,  1.63s/it]

{'eval_loss': 0.655706524848938, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1092, 'eval_samples_per_second': 9.161, 'eval_steps_per_second': 9.161, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.94s/it]


{'train_runtime': 52.2849, 'train_samples_per_second': 3.844, 'train_steps_per_second': 0.516, 'train_loss': 0.6193314305058232, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 324.59it/s]
100%|██████████| 1/1 [00:00<00:00, 373.92it/s]
100%|██████████| 1/1 [00:00<00:00, 180.59it/s]
Casting the dataset: 100%|██████████| 67/67 [00:00<00:00, 33494.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 209.72 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 145.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 347.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a

{'eval_loss': 0.7749893665313721, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1312, 'eval_samples_per_second': 7.624, 'eval_steps_per_second': 7.624, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:14,  1.61s/it]

{'eval_loss': 0.7051089406013489, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1209, 'eval_samples_per_second': 8.27, 'eval_steps_per_second': 8.27, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:50<00:00,  1.60s/it]

{'eval_loss': 0.655706524848938, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0855, 'eval_samples_per_second': 11.699, 'eval_steps_per_second': 11.699, 'epoch': 3.0}


100%|██████████| 27/27 [00:51<00:00,  1.91s/it]


{'train_runtime': 51.5609, 'train_samples_per_second': 3.898, 'train_steps_per_second': 0.524, 'train_loss': 0.6193314305058232, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 156.38it/s]
100%|██████████| 1/1 [00:00<00:00, 1542.02it/s]
100%|██████████| 1/1 [00:00<00:00, 376.00it/s]
Casting the dataset: 100%|██████████| 67/67 [00:00<00:00, 9452.67 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 146.14 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 213.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 577.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TR

{'eval_loss': 0.7749893665313721, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1078, 'eval_samples_per_second': 9.279, 'eval_steps_per_second': 9.279, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:14,  1.60s/it]

{'eval_loss': 0.7051089406013489, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1218, 'eval_samples_per_second': 8.213, 'eval_steps_per_second': 8.213, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:50<00:00,  1.59s/it]

{'eval_loss': 0.655706524848938, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0939, 'eval_samples_per_second': 10.655, 'eval_steps_per_second': 10.655, 'epoch': 3.0}


100%|██████████| 27/27 [00:51<00:00,  1.91s/it]


{'train_runtime': 51.6011, 'train_samples_per_second': 3.895, 'train_steps_per_second': 0.523, 'train_loss': 0.6193314305058232, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 171.00it/s]
100%|██████████| 1/1 [00:00<00:00, 166.20it/s]
100%|██████████| 1/1 [00:00<00:00, 243.88it/s]
Casting the dataset: 100%|██████████| 67/67 [00:00<00:00, 66877.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 210.34 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 147.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 339.26 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this 

{'eval_loss': 0.7749893665313721, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1327, 'eval_samples_per_second': 7.535, 'eval_steps_per_second': 7.535, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:32<00:14,  1.60s/it]

{'eval_loss': 0.7051089406013489, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1043, 'eval_samples_per_second': 9.59, 'eval_steps_per_second': 9.59, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:50<00:00,  1.60s/it]

{'eval_loss': 0.655706524848938, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.123, 'eval_samples_per_second': 8.128, 'eval_steps_per_second': 8.128, 'epoch': 3.0}


100%|██████████| 27/27 [00:51<00:00,  1.91s/it]


{'train_runtime': 51.4842, 'train_samples_per_second': 3.904, 'train_steps_per_second': 0.524, 'train_loss': 0.6193314305058232, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 237.73it/s]
100%|██████████| 1/1 [00:00<00:00, 144.11it/s]
100%|██████████| 1/1 [00:00<00:00, 151.38it/s]
Casting the dataset: 100%|██████████| 67/67 [00:00<00:00, 9577.67 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 907.47 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 213.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 1394.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 347.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 112.80 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pr

{'eval_loss': 0.7749893665313721, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1011, 'eval_samples_per_second': 9.889, 'eval_steps_per_second': 9.889, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:14,  1.60s/it]

{'eval_loss': 0.7051089406013489, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1162, 'eval_samples_per_second': 8.604, 'eval_steps_per_second': 8.604, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:50<00:00,  1.60s/it]

{'eval_loss': 0.655706524848938, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1176, 'eval_samples_per_second': 8.507, 'eval_steps_per_second': 8.507, 'epoch': 3.0}


100%|██████████| 27/27 [00:51<00:00,  1.91s/it]


{'train_runtime': 51.5756, 'train_samples_per_second': 3.897, 'train_steps_per_second': 0.524, 'train_loss': 0.6193314305058232, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 144.93it/s]
100%|██████████| 1/1 [00:00<00:00, 587.60it/s]
100%|██████████| 1/1 [00:00<00:00, 149.33it/s]
Casting the dataset: 100%|██████████| 68/68 [00:00<00:00, 23561.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 141.20 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 218.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 941.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this m

{'eval_loss': 0.7746464014053345, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0762, 'eval_samples_per_second': 13.122, 'eval_steps_per_second': 13.122, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:15,  1.68s/it]

{'eval_loss': 0.7041215300559998, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.157, 'eval_steps_per_second': 12.157, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.66s/it]

{'eval_loss': 0.6548824906349182, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.304, 'eval_steps_per_second': 12.304, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.94s/it]


{'train_runtime': 52.3662, 'train_samples_per_second': 3.896, 'train_steps_per_second': 0.516, 'train_loss': 0.6188572071216725, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 147.19it/s]
100%|██████████| 1/1 [00:00<00:00, 375.70it/s]
100%|██████████| 1/1 [00:00<00:00, 169.68it/s]
Casting the dataset: 100%|██████████| 68/68 [00:00<00:00, 33990.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 143.54 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.98 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 212.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 488.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 488.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 334.79 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pr

{'eval_loss': 0.7746464014053345, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.99, 'eval_steps_per_second': 11.99, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:15,  1.68s/it]

{'eval_loss': 0.7041215300559998, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.112, 'eval_steps_per_second': 12.112, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.71s/it]

{'eval_loss': 0.6548824906349182, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0969, 'eval_samples_per_second': 10.322, 'eval_steps_per_second': 10.322, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.95s/it]


{'train_runtime': 52.6501, 'train_samples_per_second': 3.875, 'train_steps_per_second': 0.513, 'train_loss': 0.6188572071216725, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 324.34it/s]
100%|██████████| 1/1 [00:00<00:00, 125.54it/s]
100%|██████████| 1/1 [00:00<00:00, 162.12it/s]
Casting the dataset: 100%|██████████| 68/68 [00:00<00:00, 34022.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 545.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 144.80 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 215.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRA

{'eval_loss': 0.7746464014053345, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0852, 'eval_samples_per_second': 11.741, 'eval_steps_per_second': 11.741, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:15,  1.67s/it]

{'eval_loss': 0.7041215300559998, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.897, 'eval_steps_per_second': 11.897, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.67s/it]

{'eval_loss': 0.6548824906349182, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0822, 'eval_samples_per_second': 12.168, 'eval_steps_per_second': 12.168, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.95s/it]


{'train_runtime': 52.527, 'train_samples_per_second': 3.884, 'train_steps_per_second': 0.514, 'train_loss': 0.6188572071216725, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 245.67it/s]
100%|██████████| 1/1 [00:00<00:00, 166.03it/s]
100%|██████████| 1/1 [00:00<00:00, 152.49it/s]
Casting the dataset: 100%|██████████| 68/68 [00:00<00:00, 76835.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 974.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 206.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRA

{'eval_loss': 0.7746464014053345, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 12.999, 'eval_steps_per_second': 12.999, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.70s/it]

{'eval_loss': 0.7041215300559998, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0872, 'eval_samples_per_second': 11.465, 'eval_steps_per_second': 11.465, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.73s/it]

{'eval_loss': 0.6548824906349182, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0825, 'eval_samples_per_second': 12.121, 'eval_steps_per_second': 12.121, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  1.98s/it]


{'train_runtime': 53.4514, 'train_samples_per_second': 3.817, 'train_steps_per_second': 0.505, 'train_loss': 0.6188572071216725, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 99.85it/s]
100%|██████████| 1/1 [00:00<00:00, 168.13it/s]
100%|██████████| 1/1 [00:00<00:00, 165.24it/s]
Casting the dataset: 100%|██████████| 68/68 [00:00<00:00, 34002.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 217.64 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 142.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 359.84 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAI

{'eval_loss': 0.7746464014053345, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.886, 'eval_steps_per_second': 11.886, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:15,  1.69s/it]

{'eval_loss': 0.7041215300559998, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.512, 'eval_steps_per_second': 12.512, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.67s/it]

{'eval_loss': 0.6548824906349182, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.076, 'eval_steps_per_second': 12.076, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.95s/it]


{'train_runtime': 52.5684, 'train_samples_per_second': 3.881, 'train_steps_per_second': 0.514, 'train_loss': 0.6188572071216725, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 84.78it/s]
100%|██████████| 1/1 [00:00<00:00, 90.06it/s]
100%|██████████| 1/1 [00:00<00:00, 127.22it/s]
Casting the dataset: 100%|██████████| 69/69 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 144.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 886.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 257.62 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 210.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 330.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 3093.14 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN 

{'eval_loss': 0.7747724652290344, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.898, 'eval_steps_per_second': 11.898, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.74s/it]

{'eval_loss': 0.7052212953567505, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0733, 'eval_samples_per_second': 13.646, 'eval_steps_per_second': 13.646, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.73s/it]

{'eval_loss': 0.6554700136184692, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0806, 'eval_samples_per_second': 12.414, 'eval_steps_per_second': 12.414, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  1.98s/it]


{'train_runtime': 53.4128, 'train_samples_per_second': 3.875, 'train_steps_per_second': 0.505, 'train_loss': 0.6184890181929977, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 162.12it/s]
100%|██████████| 1/1 [00:00<00:00, 153.89it/s]
100%|██████████| 1/1 [00:00<00:00, 212.69it/s]
Casting the dataset: 100%|██████████| 69/69 [00:00<00:00, 22847.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 547.85 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 533.02 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 208.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 1420.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 3474.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 114.61 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
Yo

{'eval_loss': 0.7747724652290344, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0865, 'eval_samples_per_second': 11.564, 'eval_steps_per_second': 11.564, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.74s/it]

{'eval_loss': 0.7052212953567505, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.851, 'eval_steps_per_second': 11.851, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.72s/it]

{'eval_loss': 0.6554700136184692, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.609, 'eval_steps_per_second': 11.609, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  1.97s/it]


{'train_runtime': 53.0715, 'train_samples_per_second': 3.9, 'train_steps_per_second': 0.509, 'train_loss': 0.6184890181929977, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.61it/s]
100%|██████████| 1/1 [00:00<00:00, 161.99it/s]
100%|██████████| 1/1 [00:00<00:00, 295.92it/s]
Casting the dataset: 100%|██████████| 69/69 [00:00<00:00, 84227.87 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 924.06 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 206.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRA

{'eval_loss': 0.7747724652290344, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.419, 'eval_steps_per_second': 11.419, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.74s/it]

{'eval_loss': 0.7052212953567505, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.005, 'eval_steps_per_second': 12.005, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.74s/it]

{'eval_loss': 0.6554700136184692, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.379, 'eval_steps_per_second': 12.379, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  1.97s/it]


{'train_runtime': 53.1063, 'train_samples_per_second': 3.898, 'train_steps_per_second': 0.508, 'train_loss': 0.6184890181929977, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 308.22it/s]
100%|██████████| 1/1 [00:00<00:00, 166.17it/s]
100%|██████████| 1/1 [00:00<00:00, 2716.52it/s]
Casting the dataset: 100%|██████████| 69/69 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.45 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 204.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRA

{'eval_loss': 0.7747724652290344, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0915, 'eval_samples_per_second': 10.932, 'eval_steps_per_second': 10.932, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.72s/it]

{'eval_loss': 0.7052212953567505, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.103, 'eval_steps_per_second': 12.103, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.73s/it]

{'eval_loss': 0.6554700136184692, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0756, 'eval_samples_per_second': 13.236, 'eval_steps_per_second': 13.236, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  1.97s/it]


{'train_runtime': 53.0825, 'train_samples_per_second': 3.9, 'train_steps_per_second': 0.509, 'train_loss': 0.6184890181929977, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 114.88it/s]
100%|██████████| 1/1 [00:00<00:00, 197.19it/s]
100%|██████████| 1/1 [00:00<00:00, 98.05it/s]
Casting the dataset: 100%|██████████| 69/69 [00:00<00:00, 34502.50 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 973.61 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 203.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a 

{'eval_loss': 0.7747724652290344, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.148, 'eval_steps_per_second': 12.148, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.74s/it]

{'eval_loss': 0.7052212953567505, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0872, 'eval_samples_per_second': 11.469, 'eval_steps_per_second': 11.469, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.73s/it]

{'eval_loss': 0.6554700136184692, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.005, 'eval_steps_per_second': 13.005, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  1.97s/it]


{'train_runtime': 53.249, 'train_samples_per_second': 3.887, 'train_steps_per_second': 0.507, 'train_loss': 0.6184890181929977, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 125.96it/s]
100%|██████████| 1/1 [00:00<00:00, 141.22it/s]
100%|██████████| 1/1 [00:00<00:00, 111.62it/s]
Casting the dataset: 100%|██████████| 70/70 [00:00<00:00, 10240.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1044.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 208.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 588.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.7752078771591187, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.137, 'eval_steps_per_second': 12.137, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.78s/it]

{'eval_loss': 0.7046639919281006, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0822, 'eval_samples_per_second': 12.173, 'eval_steps_per_second': 12.173, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.78s/it]

{'eval_loss': 0.6543404459953308, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.608, 'eval_steps_per_second': 11.608, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  1.99s/it]


{'train_runtime': 53.6092, 'train_samples_per_second': 3.917, 'train_steps_per_second': 0.504, 'train_loss': 0.618104652122215, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 1314.42it/s]
100%|██████████| 1/1 [00:00<00:00, 92.29it/s]
100%|██████████| 1/1 [00:00<00:00, 147.27it/s]
Casting the dataset: 100%|██████████| 70/70 [00:00<00:00, 62534.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 213.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 331.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 100.19 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this m

{'eval_loss': 0.7752078771591187, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0812, 'eval_samples_per_second': 12.31, 'eval_steps_per_second': 12.31, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.77s/it]

{'eval_loss': 0.7046639919281006, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0764, 'eval_samples_per_second': 13.083, 'eval_steps_per_second': 13.083, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.78s/it]

{'eval_loss': 0.6543404459953308, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0832, 'eval_samples_per_second': 12.016, 'eval_steps_per_second': 12.016, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  1.97s/it]


{'train_runtime': 53.1448, 'train_samples_per_second': 3.951, 'train_steps_per_second': 0.508, 'train_loss': 0.618104652122215, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 175.76it/s]
100%|██████████| 1/1 [00:00<00:00, 93.08it/s]
100%|██████████| 1/1 [00:00<00:00, 167.75it/s]
Casting the dataset: 100%|██████████| 70/70 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 149.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 147.76 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 208.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 1379.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 950.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN

{'eval_loss': 0.7752078771591187, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0802, 'eval_samples_per_second': 12.462, 'eval_steps_per_second': 12.462, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.78s/it]

{'eval_loss': 0.7046639919281006, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0763, 'eval_samples_per_second': 13.108, 'eval_steps_per_second': 13.108, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.77s/it]

{'eval_loss': 0.6543404459953308, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0756, 'eval_samples_per_second': 13.226, 'eval_steps_per_second': 13.226, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  1.98s/it]


{'train_runtime': 53.5308, 'train_samples_per_second': 3.923, 'train_steps_per_second': 0.504, 'train_loss': 0.618104652122215, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 165.18it/s]
100%|██████████| 1/1 [00:00<00:00, 165.25it/s]
Casting the dataset: 100%|██████████| 70/70 [00:00<00:00, 85200.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 495.25 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 202.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 277.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 62.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this mo

{'eval_loss': 0.7752078771591187, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0822, 'eval_samples_per_second': 12.159, 'eval_steps_per_second': 12.159, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:16,  1.78s/it]

{'eval_loss': 0.7046639919281006, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0745, 'eval_samples_per_second': 13.416, 'eval_steps_per_second': 13.416, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.79s/it]

{'eval_loss': 0.6543404459953308, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.155, 'eval_steps_per_second': 12.155, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  1.99s/it]


{'train_runtime': 53.8236, 'train_samples_per_second': 3.902, 'train_steps_per_second': 0.502, 'train_loss': 0.618104652122215, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 90.49it/s]
100%|██████████| 1/1 [00:00<00:00, 143.57it/s]
100%|██████████| 1/1 [00:00<00:00, 172.58it/s]
Casting the dataset: 100%|██████████| 70/70 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 976.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 967.77 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 202.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 110.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 1210.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 112.97 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should proba

{'eval_loss': 0.7752078771591187, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.842, 'eval_steps_per_second': 11.842, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:16,  1.78s/it]

{'eval_loss': 0.7046639919281006, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.272, 'eval_steps_per_second': 12.272, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.77s/it]

{'eval_loss': 0.6543404459953308, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0899, 'eval_samples_per_second': 11.128, 'eval_steps_per_second': 11.128, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  1.98s/it]


{'train_runtime': 53.5705, 'train_samples_per_second': 3.92, 'train_steps_per_second': 0.504, 'train_loss': 0.618104652122215, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 892.22it/s]
100%|██████████| 1/1 [00:00<00:00, 153.83it/s]
100%|██████████| 1/1 [00:00<00:00, 99.31it/s]
Casting the dataset: 100%|██████████| 71/71 [00:00<00:00, 70903.71 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 193.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.81 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this mo

{'eval_loss': 0.7748632431030273, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1111, 'eval_samples_per_second': 8.999, 'eval_steps_per_second': 8.999, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:16,  1.83s/it]

{'eval_loss': 0.7042592167854309, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.283, 'eval_steps_per_second': 12.283, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:53<00:00,  1.85s/it]

{'eval_loss': 0.6542355418205261, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.552, 'eval_steps_per_second': 11.552, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.02s/it]


{'train_runtime': 54.5517, 'train_samples_per_second': 3.905, 'train_steps_per_second': 0.495, 'train_loss': 0.6180395196985315, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 99.95it/s]
100%|██████████| 1/1 [00:00<00:00, 165.66it/s]
100%|██████████| 1/1 [00:00<00:00, 132.22it/s]
Casting the dataset: 100%|██████████| 71/71 [00:00<00:00, 10344.08 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1034.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 194.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 2918.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 145.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pr

{'eval_loss': 0.7748632431030273, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0773, 'eval_samples_per_second': 12.935, 'eval_steps_per_second': 12.935, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:16,  1.82s/it]

{'eval_loss': 0.7042592167854309, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.271, 'eval_steps_per_second': 12.271, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.82s/it]

{'eval_loss': 0.6542355418205261, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.274, 'eval_steps_per_second': 12.274, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  2.00s/it]


{'train_runtime': 53.9904, 'train_samples_per_second': 3.945, 'train_steps_per_second': 0.5, 'train_loss': 0.6180395196985315, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 169.76it/s]
100%|██████████| 1/1 [00:00<00:00, 144.56it/s]
100%|██████████| 1/1 [00:00<00:00, 145.87it/s]
Casting the dataset: 100%|██████████| 71/71 [00:00<00:00, 9984.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 146.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 203.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 287.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 928.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 112.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.7748632431030273, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.073, 'eval_samples_per_second': 13.692, 'eval_steps_per_second': 13.692, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:16,  1.83s/it]

{'eval_loss': 0.7042592167854309, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0926, 'eval_samples_per_second': 10.804, 'eval_steps_per_second': 10.804, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:53<00:00,  1.85s/it]

{'eval_loss': 0.6542355418205261, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0843, 'eval_samples_per_second': 11.868, 'eval_steps_per_second': 11.868, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.02s/it]


{'train_runtime': 54.4497, 'train_samples_per_second': 3.912, 'train_steps_per_second': 0.496, 'train_loss': 0.6180395196985315, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 143.12it/s]
100%|██████████| 1/1 [00:00<00:00, 462.54it/s]
100%|██████████| 1/1 [00:00<00:00, 328.19it/s]
Casting the dataset: 100%|██████████| 71/71 [00:00<00:00, 35519.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 902.78 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 842.06 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 270.84 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 198.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 334.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 109.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 4559.03 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7748632431030273, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0812, 'eval_samples_per_second': 12.308, 'eval_steps_per_second': 12.308, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:16,  1.83s/it]

{'eval_loss': 0.7042592167854309, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0914, 'eval_samples_per_second': 10.941, 'eval_steps_per_second': 10.941, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.83s/it]

{'eval_loss': 0.6542355418205261, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0863, 'eval_samples_per_second': 11.585, 'eval_steps_per_second': 11.585, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  1.99s/it]


{'train_runtime': 53.6141, 'train_samples_per_second': 3.973, 'train_steps_per_second': 0.504, 'train_loss': 0.6180395196985315, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 165.74it/s]
100%|██████████| 1/1 [00:00<00:00, 172.34it/s]
Casting the dataset: 100%|██████████| 71/71 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 495.37 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.22 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 208.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 349.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a

{'eval_loss': 0.7748632431030273, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0797, 'eval_samples_per_second': 12.549, 'eval_steps_per_second': 12.549, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:16,  1.81s/it]

{'eval_loss': 0.7042592167854309, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0883, 'eval_samples_per_second': 11.33, 'eval_steps_per_second': 11.33, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:53<00:00,  1.91s/it]

{'eval_loss': 0.6542355418205261, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.992, 'eval_steps_per_second': 11.992, 'epoch': 3.0}


100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


{'train_runtime': 55.233, 'train_samples_per_second': 3.856, 'train_steps_per_second': 0.489, 'train_loss': 0.6180395196985315, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 147.76it/s]
100%|██████████| 1/1 [00:00<00:00, 84.56it/s]
100%|██████████| 1/1 [00:00<00:00, 167.72it/s]
Casting the dataset: 100%|██████████| 72/72 [00:00<00:00, 35998.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 984.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 199.31 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this m

{'eval_loss': 0.7748396396636963, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0894, 'eval_samples_per_second': 11.192, 'eval_steps_per_second': 11.192, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:17,  1.92s/it]

{'eval_loss': 0.7040574550628662, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0842, 'eval_samples_per_second': 11.871, 'eval_steps_per_second': 11.871, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:53<00:00,  1.88s/it]

{'eval_loss': 0.6538241505622864, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.988, 'eval_steps_per_second': 11.988, 'epoch': 3.0}


100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


{'train_runtime': 55.0962, 'train_samples_per_second': 3.92, 'train_steps_per_second': 0.49, 'train_loss': 0.6178735097249349, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
Casting the dataset: 100%|██████████| 72/72 [00:00<00:00, 36006.90 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 958.48 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.68 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 214.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.93 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7748396396636963, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0979, 'eval_samples_per_second': 10.217, 'eval_steps_per_second': 10.217, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:36<00:17,  1.95s/it]

{'eval_loss': 0.7040574550628662, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.779, 'eval_steps_per_second': 11.779, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:54<00:00,  1.94s/it]

{'eval_loss': 0.6538241505622864, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.929, 'eval_steps_per_second': 11.929, 'epoch': 3.0}


100%|██████████| 27/27 [00:55<00:00,  2.07s/it]


{'train_runtime': 55.9353, 'train_samples_per_second': 3.862, 'train_steps_per_second': 0.483, 'train_loss': 0.6178735097249349, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.36it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
Casting the dataset: 100%|██████████| 72/72 [00:00<00:00, 23996.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 961.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 203.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 488.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.84 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7748396396636963, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.208, 'eval_steps_per_second': 12.208, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:17,  1.92s/it]

{'eval_loss': 0.7040574550628662, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.852, 'eval_steps_per_second': 11.852, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:53<00:00,  1.92s/it]

{'eval_loss': 0.6538241505622864, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.078, 'eval_samples_per_second': 12.813, 'eval_steps_per_second': 12.813, 'epoch': 3.0}


100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


{'train_runtime': 55.2129, 'train_samples_per_second': 3.912, 'train_steps_per_second': 0.489, 'train_loss': 0.6178735097249349, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.74it/s]
100%|██████████| 1/1 [00:00<00:00, 164.77it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 72/72 [00:00<00:00, 35998.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 205.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.42 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7748396396636963, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.372, 'eval_steps_per_second': 12.372, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:17,  1.92s/it]

{'eval_loss': 0.7040574550628662, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.093, 'eval_samples_per_second': 10.749, 'eval_steps_per_second': 10.749, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:54<00:00,  1.93s/it]

{'eval_loss': 0.6538241505622864, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.211, 'eval_steps_per_second': 12.211, 'epoch': 3.0}


100%|██████████| 27/27 [00:55<00:00,  2.06s/it]


{'train_runtime': 55.6917, 'train_samples_per_second': 3.878, 'train_steps_per_second': 0.485, 'train_loss': 0.6178735097249349, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.79it/s]
100%|██████████| 1/1 [00:00<00:00, 164.82it/s]
100%|██████████| 1/1 [00:00<00:00, 165.08it/s]
Casting the dataset: 100%|██████████| 72/72 [00:00<00:00, 23722.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 208.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.7748396396636963, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.37, 'eval_steps_per_second': 12.37, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:17,  1.91s/it]

{'eval_loss': 0.7040574550628662, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0827, 'eval_samples_per_second': 12.086, 'eval_steps_per_second': 12.086, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:53<00:00,  1.90s/it]

{'eval_loss': 0.6538241505622864, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.647, 'eval_steps_per_second': 11.647, 'epoch': 3.0}


100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


{'train_runtime': 55.1572, 'train_samples_per_second': 3.916, 'train_steps_per_second': 0.49, 'train_loss': 0.6178735097249349, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
100%|██████████| 1/1 [00:00<00:00, 197.17it/s]
100%|██████████| 1/1 [00:00<00:00, 166.63it/s]
Casting the dataset: 100%|██████████| 73/73 [00:00<00:00, 36524.42 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 210.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 324.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7451224327087402, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.38, 'eval_steps_per_second': 11.38, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:36<00:15,  1.54s/it]

{'eval_loss': 0.677827000617981, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.213, 'eval_steps_per_second': 12.213, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.58s/it]

{'eval_loss': 0.6244672536849976, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0847, 'eval_samples_per_second': 11.801, 'eval_steps_per_second': 11.801, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.91s/it]


{'train_runtime': 57.2402, 'train_samples_per_second': 3.826, 'train_steps_per_second': 0.524, 'train_loss': 0.5649354298909505, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.81it/s]
100%|██████████| 1/1 [00:00<00:00, 164.65it/s]
100%|██████████| 1/1 [00:00<00:00, 76.16it/s]
Casting the dataset: 100%|██████████| 73/73 [00:00<00:00, 36537.49 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 203.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.7638424634933472, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.088, 'eval_samples_per_second': 11.37, 'eval_steps_per_second': 11.37, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:15,  1.54s/it]

{'eval_loss': 0.6347256898880005, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.068, 'eval_steps_per_second': 12.068, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:55<00:00,  1.52s/it]

{'eval_loss': 0.5611706376075745, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.926, 'eval_steps_per_second': 11.926, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.91s/it]


{'train_runtime': 57.2575, 'train_samples_per_second': 3.825, 'train_steps_per_second': 0.524, 'train_loss': 0.7061165491739909, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.85it/s]
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
100%|██████████| 1/1 [00:00<00:00, 82.49it/s]
Casting the dataset: 100%|██████████| 73/73 [00:00<00:00, 72883.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 208.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 314.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.7638424634933472, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.268, 'eval_steps_per_second': 12.268, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:15,  1.57s/it]

{'eval_loss': 0.6347256898880005, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.373, 'eval_steps_per_second': 12.373, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.56s/it]

{'eval_loss': 0.5611706376075745, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.077, 'eval_samples_per_second': 12.994, 'eval_steps_per_second': 12.994, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.92s/it]


{'train_runtime': 57.4992, 'train_samples_per_second': 3.809, 'train_steps_per_second': 0.522, 'train_loss': 0.7061165491739909, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 164.77it/s]
100%|██████████| 1/1 [00:00<00:00, 141.07it/s]
Casting the dataset: 100%|██████████| 73/73 [00:00<00:00, 36498.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 211.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7638424634933472, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.19, 'eval_steps_per_second': 13.19, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:15,  1.54s/it]

{'eval_loss': 0.6347256898880005, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.93, 'eval_steps_per_second': 11.93, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.54s/it]

{'eval_loss': 0.5611706376075745, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.687, 'eval_steps_per_second': 12.687, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.92s/it]


{'train_runtime': 57.5583, 'train_samples_per_second': 3.805, 'train_steps_per_second': 0.521, 'train_loss': 0.7061165491739909, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 141.48it/s]
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
Casting the dataset: 100%|██████████| 73/73 [00:00<00:00, 36502.65 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 207.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 489.47 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 316.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7638424634933472, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.372, 'eval_steps_per_second': 11.372, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:15,  1.55s/it]

{'eval_loss': 0.6347256898880005, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.367, 'eval_steps_per_second': 12.367, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.53s/it]

{'eval_loss': 0.5611706376075745, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.212, 'eval_steps_per_second': 12.212, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.91s/it]


{'train_runtime': 57.2961, 'train_samples_per_second': 3.822, 'train_steps_per_second': 0.524, 'train_loss': 0.7061165491739909, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.41it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
Casting the dataset: 100%|██████████| 74/74 [00:00<00:00, 35998.43 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.70 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 198.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.26 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
Yo

{'eval_loss': 0.762380838394165, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.082, 'eval_steps_per_second': 12.082, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:15,  1.59s/it]

{'eval_loss': 0.6341342329978943, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.359, 'eval_steps_per_second': 12.359, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.59s/it]

{'eval_loss': 0.5583419799804688, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.222, 'eval_steps_per_second': 12.222, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.93s/it]


{'train_runtime': 57.9206, 'train_samples_per_second': 3.833, 'train_steps_per_second': 0.518, 'train_loss': 0.7031927108764648, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
100%|██████████| 1/1 [00:00<00:00, 166.75it/s]
100%|██████████| 1/1 [00:00<00:00, 151.91it/s]
Casting the dataset: 100%|██████████| 74/74 [00:00<00:00, 37015.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 966.65 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 200.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 309.63 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.762380838394165, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.929, 'eval_steps_per_second': 11.929, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:16,  1.61s/it]

{'eval_loss': 0.6341342329978943, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.368, 'eval_steps_per_second': 12.368, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.60s/it]

{'eval_loss': 0.5583419799804688, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.929, 'eval_steps_per_second': 11.929, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.94s/it]


{'train_runtime': 58.0635, 'train_samples_per_second': 3.823, 'train_steps_per_second': 0.517, 'train_loss': 0.7031927108764648, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.64it/s]
100%|██████████| 1/1 [00:00<00:00, 165.14it/s]
100%|██████████| 1/1 [00:00<00:00, 165.03it/s]
Casting the dataset: 100%|██████████| 74/74 [00:00<00:00, 37020.34 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 203.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 476.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.70 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.762380838394165, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.218, 'eval_steps_per_second': 12.218, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:15,  1.59s/it]

{'eval_loss': 0.6341342329978943, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.913, 'eval_steps_per_second': 11.913, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.61s/it]

{'eval_loss': 0.5583419799804688, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.933, 'eval_steps_per_second': 11.933, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.92s/it]


{'train_runtime': 57.6951, 'train_samples_per_second': 3.848, 'train_steps_per_second': 0.52, 'train_loss': 0.7031927108764648, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 152.27it/s]
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 74/74 [00:00<00:00, 36998.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 201.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
Yo

{'eval_loss': 0.762380838394165, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.064, 'eval_steps_per_second': 12.064, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:15,  1.60s/it]

{'eval_loss': 0.6341342329978943, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.917, 'eval_steps_per_second': 11.917, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:57<00:00,  1.63s/it]

{'eval_loss': 0.5583419799804688, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.066, 'eval_steps_per_second': 12.066, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.96s/it]


{'train_runtime': 58.9048, 'train_samples_per_second': 3.769, 'train_steps_per_second': 0.509, 'train_loss': 0.7031927108764648, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.84it/s]
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
Casting the dataset: 100%|██████████| 74/74 [00:00<00:00, 74023.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 962.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 199.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 489.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.762380838394165, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.371, 'eval_steps_per_second': 12.371, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:36<00:15,  1.56s/it]

{'eval_loss': 0.6341342329978943, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.849, 'eval_steps_per_second': 11.849, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:55<00:00,  1.55s/it]

{'eval_loss': 0.5583419799804688, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.382, 'eval_steps_per_second': 11.382, 'epoch': 3.0}


100%|██████████| 30/30 [00:56<00:00,  1.90s/it]


{'train_runtime': 56.8815, 'train_samples_per_second': 3.903, 'train_steps_per_second': 0.527, 'train_loss': 0.7031927108764648, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 165.08it/s]
100%|██████████| 1/1 [00:00<00:00, 165.28it/s]
Casting the dataset: 100%|██████████| 75/75 [00:00<00:00, 25043.61 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 192.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7614359855651855, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1248, 'eval_samples_per_second': 8.011, 'eval_steps_per_second': 8.011, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:16,  1.64s/it]

{'eval_loss': 0.6315677762031555, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1253, 'eval_samples_per_second': 7.981, 'eval_steps_per_second': 7.981, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.62s/it]

{'eval_loss': 0.5553551912307739, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1328, 'eval_samples_per_second': 7.531, 'eval_steps_per_second': 7.531, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.93s/it]


{'train_runtime': 57.8683, 'train_samples_per_second': 3.888, 'train_steps_per_second': 0.518, 'train_loss': 0.7017550150553385, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 165.23it/s]
Casting the dataset: 100%|██████████| 75/75 [00:00<00:00, 37507.19 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 203.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7614359855651855, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1081, 'eval_samples_per_second': 9.247, 'eval_steps_per_second': 9.247, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:16,  1.64s/it]

{'eval_loss': 0.6315677762031555, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1042, 'eval_samples_per_second': 9.598, 'eval_steps_per_second': 9.598, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.64s/it]

{'eval_loss': 0.5553551912307739, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1128, 'eval_samples_per_second': 8.863, 'eval_steps_per_second': 8.863, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.94s/it]


{'train_runtime': 58.1748, 'train_samples_per_second': 3.868, 'train_steps_per_second': 0.516, 'train_loss': 0.7017550150553385, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.77it/s]
100%|██████████| 1/1 [00:00<00:00, 164.92it/s]
100%|██████████| 1/1 [00:00<00:00, 164.70it/s]
Casting the dataset: 100%|██████████| 75/75 [00:00<00:00, 37493.78 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 200.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7614359855651855, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.52, 'eval_steps_per_second': 12.52, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:36<00:15,  1.59s/it]

{'eval_loss': 0.6315677762031555, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1353, 'eval_samples_per_second': 7.388, 'eval_steps_per_second': 7.388, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:55<00:00,  1.64s/it]

{'eval_loss': 0.5553551912307739, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1289, 'eval_samples_per_second': 7.759, 'eval_steps_per_second': 7.759, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.91s/it]


{'train_runtime': 57.2011, 'train_samples_per_second': 3.933, 'train_steps_per_second': 0.524, 'train_loss': 0.7017550150553385, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 189.76it/s]
100%|██████████| 1/1 [00:00<00:00, 90.40it/s]
100%|██████████| 1/1 [00:00<00:00, 84.61it/s]
Casting the dataset: 100%|██████████| 75/75 [00:00<00:00, 37449.14 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 199.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7614359855651855, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.116, 'eval_samples_per_second': 8.617, 'eval_steps_per_second': 8.617, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:16,  1.63s/it]

{'eval_loss': 0.6315677762031555, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1103, 'eval_samples_per_second': 9.069, 'eval_steps_per_second': 9.069, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.63s/it]

{'eval_loss': 0.5553551912307739, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1155, 'eval_samples_per_second': 8.656, 'eval_steps_per_second': 8.656, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.93s/it]


{'train_runtime': 57.9517, 'train_samples_per_second': 3.883, 'train_steps_per_second': 0.518, 'train_loss': 0.7017550150553385, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 143.17it/s]
100%|██████████| 1/1 [00:00<00:00, 140.32it/s]
100%|██████████| 1/1 [00:00<00:00, 268.44it/s]
Casting the dataset: 100%|██████████| 75/75 [00:00<00:00, 24300.72 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 145.57 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 199.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 343.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 303.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.7614359855651855, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1317, 'eval_samples_per_second': 7.594, 'eval_steps_per_second': 7.594, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:16,  1.65s/it]

{'eval_loss': 0.6315677762031555, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1048, 'eval_samples_per_second': 9.539, 'eval_steps_per_second': 9.539, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.63s/it]

{'eval_loss': 0.5553551912307739, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1389, 'eval_samples_per_second': 7.197, 'eval_steps_per_second': 7.197, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.93s/it]


{'train_runtime': 57.9376, 'train_samples_per_second': 3.883, 'train_steps_per_second': 0.518, 'train_loss': 0.7017550150553385, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 125.40it/s]
100%|██████████| 1/1 [00:00<00:00, 142.51it/s]
100%|██████████| 1/1 [00:00<00:00, 90.48it/s]
Casting the dataset: 100%|██████████| 76/76 [00:00<00:00, 4766.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 198.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 568.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this mod

{'eval_loss': 0.7612231969833374, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.851, 'eval_steps_per_second': 11.851, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:16,  1.67s/it]

{'eval_loss': 0.6289168000221252, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.997, 'eval_steps_per_second': 11.997, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.65s/it]

{'eval_loss': 0.5541137456893921, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.131, 'eval_steps_per_second': 12.131, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.93s/it]


{'train_runtime': 57.8076, 'train_samples_per_second': 3.944, 'train_steps_per_second': 0.519, 'train_loss': 0.7006245930989583, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 143.59it/s]
100%|██████████| 1/1 [00:00<00:00, 89.62it/s]
100%|██████████| 1/1 [00:00<00:00, 167.04it/s]
Casting the dataset: 100%|██████████| 76/76 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 978.15 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 199.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 1112.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a 

{'eval_loss': 0.7612231969833374, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0835, 'eval_samples_per_second': 11.969, 'eval_steps_per_second': 11.969, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:16,  1.68s/it]

{'eval_loss': 0.6289168000221252, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.078, 'eval_samples_per_second': 12.812, 'eval_steps_per_second': 12.812, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:57<00:00,  1.71s/it]

{'eval_loss': 0.5541137456893921, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0952, 'eval_samples_per_second': 10.504, 'eval_steps_per_second': 10.504, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.96s/it]


{'train_runtime': 58.7219, 'train_samples_per_second': 3.883, 'train_steps_per_second': 0.511, 'train_loss': 0.7006245930989583, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 273.71it/s]
100%|██████████| 1/1 [00:00<00:00, 125.51it/s]
100%|██████████| 1/1 [00:00<00:00, 147.25it/s]
Casting the dataset: 100%|██████████| 76/76 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.32 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 198.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 359.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this mo

{'eval_loss': 0.7612231969833374, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1004, 'eval_samples_per_second': 9.96, 'eval_steps_per_second': 9.96, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:17,  1.72s/it]

{'eval_loss': 0.6289168000221252, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.989, 'eval_steps_per_second': 11.989, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:57<00:00,  1.67s/it]

{'eval_loss': 0.5541137456893921, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 11.999, 'eval_steps_per_second': 11.999, 'epoch': 3.0}


100%|██████████| 30/30 [00:59<00:00,  1.97s/it]


{'train_runtime': 59.1541, 'train_samples_per_second': 3.854, 'train_steps_per_second': 0.507, 'train_loss': 0.7006245930989583, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 150.25it/s]
100%|██████████| 1/1 [00:00<00:00, 162.19it/s]
100%|██████████| 1/1 [00:00<00:00, 140.98it/s]
Casting the dataset: 100%|██████████| 76/76 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 191.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a 

{'eval_loss': 0.7612231969833374, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0796, 'eval_samples_per_second': 12.558, 'eval_steps_per_second': 12.558, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:16,  1.67s/it]

{'eval_loss': 0.6289168000221252, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0896, 'eval_samples_per_second': 11.157, 'eval_steps_per_second': 11.157, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.67s/it]

{'eval_loss': 0.5541137456893921, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0892, 'eval_samples_per_second': 11.209, 'eval_steps_per_second': 11.209, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.92s/it]


{'train_runtime': 57.6797, 'train_samples_per_second': 3.953, 'train_steps_per_second': 0.52, 'train_loss': 0.7006245930989583, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 113.71it/s]
100%|██████████| 1/1 [00:00<00:00, 114.87it/s]
100%|██████████| 1/1 [00:00<00:00, 143.48it/s]
Casting the dataset: 100%|██████████| 76/76 [00:00<00:00, 11103.77 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 948.94 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 191.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 149.32 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TR

{'eval_loss': 0.7612231969833374, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.37, 'eval_steps_per_second': 12.37, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:16,  1.69s/it]

{'eval_loss': 0.6289168000221252, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0855, 'eval_samples_per_second': 11.698, 'eval_steps_per_second': 11.698, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:57<00:00,  1.69s/it]

{'eval_loss': 0.5541137456893921, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.362, 'eval_steps_per_second': 13.362, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.96s/it]


{'train_runtime': 58.7889, 'train_samples_per_second': 3.878, 'train_steps_per_second': 0.51, 'train_loss': 0.7006245930989583, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.73it/s]
100%|██████████| 1/1 [00:00<00:00, 164.49it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
Casting the dataset: 100%|██████████| 77/77 [00:00<00:00, 25642.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 185.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 313.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 317.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.7604514360427856, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0795, 'eval_samples_per_second': 12.583, 'eval_steps_per_second': 12.583, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:17,  1.71s/it]

{'eval_loss': 0.6272217035293579, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.358, 'eval_steps_per_second': 12.358, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:57<00:00,  1.73s/it]

{'eval_loss': 0.5532033443450928, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.062, 'eval_steps_per_second': 12.062, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.95s/it]


{'train_runtime': 58.4087, 'train_samples_per_second': 3.955, 'train_steps_per_second': 0.514, 'train_loss': 0.6998313268025717, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.20it/s]
100%|██████████| 1/1 [00:00<00:00, 165.16it/s]
100%|██████████| 1/1 [00:00<00:00, 141.65it/s]
Casting the dataset: 100%|██████████| 77/77 [00:00<00:00, 37711.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.22 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 188.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 486.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.7604514360427856, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0817, 'eval_samples_per_second': 12.233, 'eval_steps_per_second': 12.233, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:17,  1.74s/it]

{'eval_loss': 0.6272217035293579, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0835, 'eval_samples_per_second': 11.981, 'eval_steps_per_second': 11.981, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:57<00:00,  1.76s/it]

{'eval_loss': 0.5532033443450928, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.015, 'eval_steps_per_second': 13.015, 'epoch': 3.0}


100%|██████████| 30/30 [00:59<00:00,  1.98s/it]


{'train_runtime': 59.3065, 'train_samples_per_second': 3.895, 'train_steps_per_second': 0.506, 'train_loss': 0.6998313268025717, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.74it/s]
100%|██████████| 1/1 [00:00<00:00, 166.63it/s]
100%|██████████| 1/1 [00:00<00:00, 164.85it/s]
Casting the dataset: 100%|██████████| 77/77 [00:00<00:00, 38548.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 178.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
Yo

{'eval_loss': 0.7604514360427856, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0749, 'eval_samples_per_second': 13.344, 'eval_steps_per_second': 13.344, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:17,  1.72s/it]

{'eval_loss': 0.6272217035293579, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.914, 'eval_steps_per_second': 11.914, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:57<00:00,  1.73s/it]

{'eval_loss': 0.5532033443450928, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.84, 'eval_steps_per_second': 12.84, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.96s/it]


{'train_runtime': 58.8547, 'train_samples_per_second': 3.925, 'train_steps_per_second': 0.51, 'train_loss': 0.6998313268025717, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
100%|██████████| 1/1 [00:00<00:00, 164.70it/s]
100%|██████████| 1/1 [00:00<00:00, 163.53it/s]
Casting the dataset: 100%|██████████| 77/77 [00:00<00:00, 25668.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 186.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.85 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.7604514360427856, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.639, 'eval_steps_per_second': 11.639, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:17,  1.74s/it]

{'eval_loss': 0.6272217035293579, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.377, 'eval_steps_per_second': 11.377, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:57<00:00,  1.74s/it]

{'eval_loss': 0.5532033443450928, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0749, 'eval_samples_per_second': 13.351, 'eval_steps_per_second': 13.351, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.96s/it]


{'train_runtime': 58.7582, 'train_samples_per_second': 3.931, 'train_steps_per_second': 0.511, 'train_loss': 0.6998313268025717, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.44it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
100%|██████████| 1/1 [00:00<00:00, 165.25it/s]
Casting the dataset: 100%|██████████| 77/77 [00:00<00:00, 37349.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 949.15 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 187.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7604514360427856, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.369, 'eval_steps_per_second': 12.369, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:17,  1.72s/it]

{'eval_loss': 0.6272217035293579, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.776, 'eval_steps_per_second': 11.776, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:57<00:00,  1.73s/it]

{'eval_loss': 0.5532033443450928, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.365, 'eval_steps_per_second': 12.365, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.94s/it]


{'train_runtime': 58.3477, 'train_samples_per_second': 3.959, 'train_steps_per_second': 0.514, 'train_loss': 0.6998313268025717, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.03it/s]
100%|██████████| 1/1 [00:00<00:00, 166.66it/s]
100%|██████████| 1/1 [00:00<00:00, 164.98it/s]
Casting the dataset: 100%|██████████| 78/78 [00:00<00:00, 38313.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 184.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.7610437273979187, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.794, 'eval_steps_per_second': 11.794, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:17,  1.78s/it]

{'eval_loss': 0.6267425417900085, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.932, 'eval_steps_per_second': 11.932, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:58<00:00,  1.78s/it]

{'eval_loss': 0.5527058839797974, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.067, 'eval_steps_per_second': 12.067, 'epoch': 3.0}


100%|██████████| 30/30 [00:59<00:00,  1.99s/it]


{'train_runtime': 59.6547, 'train_samples_per_second': 3.923, 'train_steps_per_second': 0.503, 'train_loss': 0.6996554056803386, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.12it/s]
100%|██████████| 1/1 [00:00<00:00, 165.13it/s]
100%|██████████| 1/1 [00:00<00:00, 165.13it/s]
Casting the dataset: 100%|██████████| 78/78 [00:00<00:00, 38998.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 954.77 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 185.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7610437273979187, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.84, 'eval_steps_per_second': 12.84, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:17,  1.77s/it]

{'eval_loss': 0.6267425417900085, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.067, 'eval_steps_per_second': 12.067, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:58<00:00,  1.78s/it]

{'eval_loss': 0.5527058839797974, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.217, 'eval_steps_per_second': 12.217, 'epoch': 3.0}


100%|██████████| 30/30 [00:59<00:00,  1.98s/it]


{'train_runtime': 59.4143, 'train_samples_per_second': 3.938, 'train_steps_per_second': 0.505, 'train_loss': 0.6996554056803386, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.12it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
Casting the dataset: 100%|██████████| 78/78 [00:00<00:00, 38326.58 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 963.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 174.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.73 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.7610437273979187, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.091, 'eval_samples_per_second': 10.993, 'eval_steps_per_second': 10.993, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:39<00:18,  1.83s/it]

{'eval_loss': 0.6267425417900085, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.362, 'eval_steps_per_second': 12.362, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:58<00:00,  1.78s/it]

{'eval_loss': 0.5527058839797974, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.366, 'eval_steps_per_second': 12.366, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.01s/it]


{'train_runtime': 60.3075, 'train_samples_per_second': 3.88, 'train_steps_per_second': 0.497, 'train_loss': 0.6996554056803386, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.72it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
100%|██████████| 1/1 [00:00<00:00, 165.08it/s]
Casting the dataset: 100%|██████████| 78/78 [00:00<00:00, 39002.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 958.70 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 964.21 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 182.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 498.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.7610437273979187, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.367, 'eval_steps_per_second': 12.367, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:17,  1.77s/it]

{'eval_loss': 0.6267425417900085, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.021, 'eval_steps_per_second': 13.021, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:57<00:00,  1.77s/it]

{'eval_loss': 0.5527058839797974, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.779, 'eval_steps_per_second': 11.779, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.96s/it]


{'train_runtime': 58.8336, 'train_samples_per_second': 3.977, 'train_steps_per_second': 0.51, 'train_loss': 0.6996554056803386, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.97it/s]
100%|██████████| 1/1 [00:00<00:00, 146.89it/s]
100%|██████████| 1/1 [00:00<00:00, 141.80it/s]
Casting the dataset: 100%|██████████| 78/78 [00:00<00:00, 38210.20 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 960.23 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 193.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7610437273979187, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.369, 'eval_steps_per_second': 12.369, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:17,  1.78s/it]

{'eval_loss': 0.6267425417900085, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0759, 'eval_samples_per_second': 13.177, 'eval_steps_per_second': 13.177, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:58<00:00,  1.79s/it]

{'eval_loss': 0.5527058839797974, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.519, 'eval_steps_per_second': 12.519, 'epoch': 3.0}


100%|██████████| 30/30 [00:59<00:00,  1.99s/it]


{'train_runtime': 59.6481, 'train_samples_per_second': 3.923, 'train_steps_per_second': 0.503, 'train_loss': 0.6996554056803386, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
100%|██████████| 1/1 [00:00<00:00, 164.96it/s]
100%|██████████| 1/1 [00:00<00:00, 164.71it/s]
Casting the dataset: 100%|██████████| 79/79 [00:00<00:00, 39507.57 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 178.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.7617278695106506, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.007, 'eval_steps_per_second': 12.007, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:18,  1.82s/it]

{'eval_loss': 0.6261718273162842, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.285, 'eval_steps_per_second': 12.285, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:58<00:00,  1.82s/it]

{'eval_loss': 0.5528945922851562, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.99, 'eval_steps_per_second': 11.99, 'epoch': 3.0}


100%|██████████| 30/30 [00:59<00:00,  1.99s/it]


{'train_runtime': 59.7166, 'train_samples_per_second': 3.969, 'train_steps_per_second': 0.502, 'train_loss': 0.699321428934733, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
100%|██████████| 1/1 [00:00<00:00, 141.70it/s]
100%|██████████| 1/1 [00:00<00:00, 141.43it/s]
Casting the dataset: 100%|██████████| 79/79 [00:00<00:00, 39498.15 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 188.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.7617278695106506, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.213, 'eval_steps_per_second': 12.213, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:18,  1.85s/it]

{'eval_loss': 0.6261718273162842, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.529, 'eval_steps_per_second': 12.529, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:58<00:00,  1.84s/it]

{'eval_loss': 0.5528945922851562, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.072, 'eval_steps_per_second': 12.072, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.00s/it]


{'train_runtime': 60.1383, 'train_samples_per_second': 3.941, 'train_steps_per_second': 0.499, 'train_loss': 0.699321428934733, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.20it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
Casting the dataset: 100%|██████████| 79/79 [00:00<00:00, 38754.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 961.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 182.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 322.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.7617278695106506, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0789, 'eval_samples_per_second': 12.67, 'eval_steps_per_second': 12.67, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:18,  1.82s/it]

{'eval_loss': 0.6261718273162842, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0912, 'eval_samples_per_second': 10.964, 'eval_steps_per_second': 10.964, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:57<00:00,  1.82s/it]

{'eval_loss': 0.5528945922851562, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.366, 'eval_steps_per_second': 12.366, 'epoch': 3.0}


100%|██████████| 30/30 [00:59<00:00,  1.98s/it]


{'train_runtime': 59.3478, 'train_samples_per_second': 3.993, 'train_steps_per_second': 0.505, 'train_loss': 0.699321428934733, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.18it/s]
100%|██████████| 1/1 [00:00<00:00, 164.96it/s]
100%|██████████| 1/1 [00:00<00:00, 141.21it/s]
Casting the dataset: 100%|██████████| 79/79 [00:00<00:00, 39484.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 959.36 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 184.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.7617278695106506, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.219, 'eval_steps_per_second': 12.219, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:39<00:18,  1.85s/it]

{'eval_loss': 0.6261718273162842, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.071, 'eval_steps_per_second': 12.071, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:59<00:00,  1.85s/it]

{'eval_loss': 0.5528945922851562, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.687, 'eval_steps_per_second': 12.687, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.02s/it]


{'train_runtime': 60.5668, 'train_samples_per_second': 3.913, 'train_steps_per_second': 0.495, 'train_loss': 0.699321428934733, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
100%|██████████| 1/1 [00:00<00:00, 165.05it/s]
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
Casting the dataset: 100%|██████████| 79/79 [00:00<00:00, 37030.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.05 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 188.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 330.83 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7617278695106506, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.372, 'eval_steps_per_second': 12.372, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:18,  1.82s/it]

{'eval_loss': 0.6261718273162842, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.789, 'eval_steps_per_second': 11.789, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:58<00:00,  1.82s/it]

{'eval_loss': 0.5528945922851562, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.783, 'eval_steps_per_second': 11.783, 'epoch': 3.0}


100%|██████████| 30/30 [00:59<00:00,  1.99s/it]


{'train_runtime': 59.7306, 'train_samples_per_second': 3.968, 'train_steps_per_second': 0.502, 'train_loss': 0.699321428934733, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.39it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 165.12it/s]
Casting the dataset: 100%|██████████| 80/80 [00:00<00:00, 26664.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 177.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.7620755434036255, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.223, 'eval_steps_per_second': 12.223, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:18,  1.89s/it]

{'eval_loss': 0.6263332962989807, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.621, 'eval_steps_per_second': 11.621, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:58<00:00,  1.88s/it]

{'eval_loss': 0.5533432960510254, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.652, 'eval_steps_per_second': 11.652, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.01s/it]


{'train_runtime': 60.3248, 'train_samples_per_second': 3.978, 'train_steps_per_second': 0.497, 'train_loss': 0.6991659164428711, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 200.03it/s]
100%|██████████| 1/1 [00:00<00:00, 165.07it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
Casting the dataset: 100%|██████████| 80/80 [00:00<00:00, 39189.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 959.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 178.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.7620755434036255, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.051, 'eval_steps_per_second': 12.051, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:18,  1.88s/it]

{'eval_loss': 0.6263332962989807, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.638, 'eval_steps_per_second': 11.638, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:58<00:00,  1.87s/it]

{'eval_loss': 0.5533432960510254, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.855, 'eval_steps_per_second': 12.855, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.01s/it]


{'train_runtime': 60.2191, 'train_samples_per_second': 3.985, 'train_steps_per_second': 0.498, 'train_loss': 0.6991659164428711, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.99it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 164.68it/s]
Casting the dataset: 100%|██████████| 80/80 [00:00<00:00, 26329.59 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 179.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7620755434036255, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.932, 'eval_steps_per_second': 11.932, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:39<00:18,  1.89s/it]

{'eval_loss': 0.6263332962989807, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.916, 'eval_steps_per_second': 11.916, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:59<00:00,  1.89s/it]

{'eval_loss': 0.5533432960510254, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0749, 'eval_samples_per_second': 13.358, 'eval_steps_per_second': 13.358, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.03s/it]


{'train_runtime': 60.9036, 'train_samples_per_second': 3.941, 'train_steps_per_second': 0.493, 'train_loss': 0.6991659164428711, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.08it/s]
100%|██████████| 1/1 [00:00<00:00, 164.93it/s]
100%|██████████| 1/1 [00:00<00:00, 164.68it/s]
Casting the dataset: 100%|██████████| 80/80 [00:00<00:00, 26681.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 172.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 244.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.7620755434036255, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.263, 'eval_steps_per_second': 12.263, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:18,  1.85s/it]

{'eval_loss': 0.6263332962989807, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.369, 'eval_steps_per_second': 12.369, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:59<00:00,  1.90s/it]

{'eval_loss': 0.5533432960510254, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.361, 'eval_steps_per_second': 12.361, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.02s/it]


{'train_runtime': 60.706, 'train_samples_per_second': 3.953, 'train_steps_per_second': 0.494, 'train_loss': 0.6991659164428711, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
100%|██████████| 1/1 [00:00<00:00, 164.62it/s]
100%|██████████| 1/1 [00:00<00:00, 141.39it/s]
Casting the dataset: 100%|██████████| 80/80 [00:00<00:00, 40021.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.94 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 181.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7620755434036255, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.14, 'eval_steps_per_second': 12.14, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:18,  1.89s/it]

{'eval_loss': 0.6263332962989807, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.074, 'eval_steps_per_second': 12.074, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:59<00:00,  1.91s/it]

{'eval_loss': 0.5533432960510254, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.85, 'eval_steps_per_second': 12.85, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.02s/it]


{'train_runtime': 60.4858, 'train_samples_per_second': 3.968, 'train_steps_per_second': 0.496, 'train_loss': 0.6991659164428711, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.66it/s]
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
100%|██████████| 1/1 [00:00<00:00, 141.86it/s]
Casting the dataset: 100%|██████████| 81/81 [00:00<00:00, 26573.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 177.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7490140795707703, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.858, 'eval_steps_per_second': 12.858, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:39<00:16,  1.51s/it]

{'eval_loss': 0.5723060965538025, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.286, 'eval_steps_per_second': 12.286, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:00<00:00,  1.53s/it]

{'eval_loss': 0.5272695422172546, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.022, 'eval_steps_per_second': 13.022, 'epoch': 3.0}


100%|██████████| 33/33 [01:01<00:00,  1.87s/it]


{'train_runtime': 61.7882, 'train_samples_per_second': 3.933, 'train_steps_per_second': 0.534, 'train_loss': 0.6401262572317412, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 163.80it/s]
100%|██████████| 1/1 [00:00<00:00, 141.71it/s]
100%|██████████| 1/1 [00:00<00:00, 141.80it/s]
Casting the dataset: 100%|██████████| 81/81 [00:00<00:00, 40517.43 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 176.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 320.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.48 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.6825413107872009, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0772, 'eval_samples_per_second': 12.959, 'eval_steps_per_second': 12.959, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:40<00:16,  1.53s/it]

{'eval_loss': 0.5410040020942688, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.511, 'eval_steps_per_second': 11.511, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:01<00:00,  1.57s/it]

{'eval_loss': 0.5098768472671509, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.248, 'eval_steps_per_second': 11.248, 'epoch': 3.0}


100%|██████████| 33/33 [01:02<00:00,  1.91s/it]


{'train_runtime': 62.9975, 'train_samples_per_second': 3.857, 'train_steps_per_second': 0.524, 'train_loss': 0.6156285025856711, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 90.53it/s]
100%|██████████| 1/1 [00:00<00:00, 141.25it/s]
100%|██████████| 1/1 [00:00<00:00, 82.54it/s]
Casting the dataset: 100%|██████████| 81/81 [00:00<00:00, 39555.08 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 953.90 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 180.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.6825413107872009, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.983, 'eval_steps_per_second': 11.983, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:40<00:17,  1.56s/it]

{'eval_loss': 0.5410040020942688, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.077, 'eval_steps_per_second': 12.077, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.64s/it]

{'eval_loss': 0.5098768472671509, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0995, 'eval_samples_per_second': 10.049, 'eval_steps_per_second': 10.049, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.98s/it]


{'train_runtime': 65.2575, 'train_samples_per_second': 3.724, 'train_steps_per_second': 0.506, 'train_loss': 0.6156285025856711, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.73it/s]
100%|██████████| 1/1 [00:00<00:00, 159.38it/s]
100%|██████████| 1/1 [00:00<00:00, 144.81it/s]
Casting the dataset: 100%|██████████| 81/81 [00:00<00:00, 30801.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 719.43 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 172.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 551.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 338.14 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able

{'eval_loss': 0.6825413107872009, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.643, 'eval_steps_per_second': 11.643, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:17,  1.61s/it]

{'eval_loss': 0.5410040020942688, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.839, 'eval_steps_per_second': 12.839, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.57s/it]

{'eval_loss': 0.5098768472671509, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.195, 'eval_steps_per_second': 12.195, 'epoch': 3.0}


100%|██████████| 33/33 [01:04<00:00,  1.97s/it]


{'train_runtime': 64.9776, 'train_samples_per_second': 3.74, 'train_steps_per_second': 0.508, 'train_loss': 0.6156285025856711, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 164.03it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
Casting the dataset: 100%|██████████| 81/81 [00:00<00:00, 27016.99 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 168.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.6825413107872009, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0825, 'eval_samples_per_second': 12.124, 'eval_steps_per_second': 12.124, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:41<00:17,  1.59s/it]

{'eval_loss': 0.5410040020942688, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0894, 'eval_samples_per_second': 11.182, 'eval_steps_per_second': 11.182, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:02<00:00,  1.56s/it]

{'eval_loss': 0.5098768472671509, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0754, 'eval_samples_per_second': 13.271, 'eval_steps_per_second': 13.271, 'epoch': 3.0}


100%|██████████| 33/33 [01:04<00:00,  1.95s/it]


{'train_runtime': 64.2792, 'train_samples_per_second': 3.78, 'train_steps_per_second': 0.513, 'train_loss': 0.6156285025856711, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 200.02it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 141.34it/s]
Casting the dataset: 100%|██████████| 82/82 [00:00<00:00, 26736.08 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.75 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 176.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.6831223964691162, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.278, 'eval_steps_per_second': 12.278, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:41<00:17,  1.59s/it]

{'eval_loss': 0.5394653081893921, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.1013, 'eval_samples_per_second': 9.869, 'eval_steps_per_second': 9.869, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.63s/it]

{'eval_loss': 0.5111249089241028, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.195, 'eval_steps_per_second': 12.195, 'epoch': 3.0}


100%|██████████| 33/33 [01:04<00:00,  1.95s/it]


{'train_runtime': 64.3592, 'train_samples_per_second': 3.822, 'train_steps_per_second': 0.513, 'train_loss': 0.6132023551247336, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 142.82it/s]
Casting the dataset: 100%|██████████| 82/82 [00:00<00:00, 39738.06 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 174.92 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.6831223964691162, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.195, 'eval_steps_per_second': 12.195, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:18,  1.68s/it]

{'eval_loss': 0.5394653081893921, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.753, 'eval_steps_per_second': 11.753, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.60s/it]

{'eval_loss': 0.5111249089241028, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0922, 'eval_samples_per_second': 10.843, 'eval_steps_per_second': 10.843, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.99s/it]


{'train_runtime': 65.7956, 'train_samples_per_second': 3.739, 'train_steps_per_second': 0.502, 'train_loss': 0.6132023551247336, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 169.58it/s]
100%|██████████| 1/1 [00:00<00:00, 130.49it/s]
100%|██████████| 1/1 [00:00<00:00, 166.82it/s]
Casting the dataset: 100%|██████████| 82/82 [00:00<00:00, 82280.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 174.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 252.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 314.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 229.61 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be abl

{'eval_loss': 0.6831223964691162, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.154, 'eval_steps_per_second': 12.154, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:17,  1.63s/it]

{'eval_loss': 0.5394653081893921, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.268, 'eval_steps_per_second': 12.268, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.63s/it]

{'eval_loss': 0.5111249089241028, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.837, 'eval_steps_per_second': 12.837, 'epoch': 3.0}


100%|██████████| 33/33 [01:04<00:00,  1.97s/it]


{'train_runtime': 64.9669, 'train_samples_per_second': 3.787, 'train_steps_per_second': 0.508, 'train_loss': 0.6132023551247336, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.05it/s]
100%|██████████| 1/1 [00:00<00:00, 164.87it/s]
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
Casting the dataset: 100%|██████████| 82/82 [00:00<00:00, 41007.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 965.54 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 186.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.6831223964691162, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.077, 'eval_steps_per_second': 12.077, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:41<00:17,  1.58s/it]

{'eval_loss': 0.5394653081893921, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.371, 'eval_steps_per_second': 12.371, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:02<00:00,  1.61s/it]

{'eval_loss': 0.5111249089241028, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.846, 'eval_steps_per_second': 12.846, 'epoch': 3.0}


100%|██████████| 33/33 [01:03<00:00,  1.93s/it]


{'train_runtime': 63.7218, 'train_samples_per_second': 3.861, 'train_steps_per_second': 0.518, 'train_loss': 0.6132023551247336, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.78it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
100%|██████████| 1/1 [00:00<00:00, 141.67it/s]
Casting the dataset: 100%|██████████| 82/82 [00:00<00:00, 39968.96 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 181.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.22 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.6831223964691162, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.849, 'eval_steps_per_second': 12.849, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:41<00:18,  1.65s/it]

{'eval_loss': 0.5394653081893921, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.361, 'eval_steps_per_second': 12.361, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.61s/it]

{'eval_loss': 0.5111249089241028, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.372, 'eval_steps_per_second': 12.372, 'epoch': 3.0}


100%|██████████| 33/33 [01:04<00:00,  1.95s/it]


{'train_runtime': 64.3476, 'train_samples_per_second': 3.823, 'train_steps_per_second': 0.513, 'train_loss': 0.6132023551247336, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.27it/s]
100%|██████████| 1/1 [00:00<00:00, 166.22it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 83/83 [00:00<00:00, 27684.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 176.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able

{'eval_loss': 0.6811578869819641, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1365, 'eval_samples_per_second': 7.328, 'eval_steps_per_second': 7.328, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:41<00:18,  1.64s/it]

{'eval_loss': 0.5375601053237915, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.1212, 'eval_samples_per_second': 8.252, 'eval_steps_per_second': 8.252, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:02<00:00,  1.65s/it]

{'eval_loss': 0.5109031200408936, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.1362, 'eval_samples_per_second': 7.344, 'eval_steps_per_second': 7.344, 'epoch': 3.0}


100%|██████████| 33/33 [01:04<00:00,  1.94s/it]


{'train_runtime': 64.182, 'train_samples_per_second': 3.88, 'train_steps_per_second': 0.514, 'train_loss': 0.6124967517274799, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.55it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 164.76it/s]
Casting the dataset: 100%|██████████| 83/83 [00:00<00:00, 41488.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 902.97 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 176.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.6811578869819641, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.153, 'eval_samples_per_second': 6.534, 'eval_steps_per_second': 6.534, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:18,  1.69s/it]

{'eval_loss': 0.5375601053237915, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.1309, 'eval_samples_per_second': 7.638, 'eval_steps_per_second': 7.638, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.67s/it]

{'eval_loss': 0.5109031200408936, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.1311, 'eval_samples_per_second': 7.625, 'eval_steps_per_second': 7.625, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.97s/it]


{'train_runtime': 65.1287, 'train_samples_per_second': 3.823, 'train_steps_per_second': 0.507, 'train_loss': 0.6124967517274799, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
100%|██████████| 1/1 [00:00<00:00, 164.74it/s]
100%|██████████| 1/1 [00:00<00:00, 164.76it/s]
Casting the dataset: 100%|██████████| 83/83 [00:00<00:00, 41468.40 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 177.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.6811578869819641, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1414, 'eval_samples_per_second': 7.073, 'eval_steps_per_second': 7.073, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:18,  1.67s/it]

{'eval_loss': 0.5375601053237915, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.1269, 'eval_samples_per_second': 7.878, 'eval_steps_per_second': 7.878, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.64s/it]

{'eval_loss': 0.5109031200408936, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.1314, 'eval_samples_per_second': 7.61, 'eval_steps_per_second': 7.61, 'epoch': 3.0}


100%|██████████| 33/33 [01:04<00:00,  1.96s/it]


{'train_runtime': 64.8269, 'train_samples_per_second': 3.841, 'train_steps_per_second': 0.509, 'train_loss': 0.6124967517274799, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.15it/s]
100%|██████████| 1/1 [00:00<00:00, 165.01it/s]
100%|██████████| 1/1 [00:00<00:00, 165.09it/s]
Casting the dataset: 100%|██████████| 83/83 [00:00<00:00, 40517.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.98 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 176.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.6811578869819641, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1286, 'eval_samples_per_second': 7.777, 'eval_steps_per_second': 7.777, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:41<00:18,  1.67s/it]

{'eval_loss': 0.5375601053237915, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.125, 'eval_samples_per_second': 8.002, 'eval_steps_per_second': 8.002, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.67s/it]

{'eval_loss': 0.5109031200408936, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.1266, 'eval_samples_per_second': 7.9, 'eval_steps_per_second': 7.9, 'epoch': 3.0}


100%|██████████| 33/33 [01:04<00:00,  1.96s/it]


{'train_runtime': 64.8202, 'train_samples_per_second': 3.841, 'train_steps_per_second': 0.509, 'train_loss': 0.6124967517274799, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.00it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 164.91it/s]
Casting the dataset: 100%|██████████| 83/83 [00:00<00:00, 41507.96 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 955.42 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 179.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.6811578869819641, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1302, 'eval_samples_per_second': 7.678, 'eval_steps_per_second': 7.678, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:41<00:17,  1.61s/it]

{'eval_loss': 0.5375601053237915, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.535, 'eval_steps_per_second': 12.535, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:02<00:00,  1.64s/it]

{'eval_loss': 0.5109031200408936, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.1528, 'eval_samples_per_second': 6.544, 'eval_steps_per_second': 6.544, 'epoch': 3.0}


100%|██████████| 33/33 [01:03<00:00,  1.93s/it]


{'train_runtime': 63.8138, 'train_samples_per_second': 3.902, 'train_steps_per_second': 0.517, 'train_loss': 0.6124967517274799, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.76it/s]
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
Casting the dataset: 100%|██████████| 84/84 [00:00<00:00, 41983.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 176.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.6815593242645264, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.64, 'eval_steps_per_second': 11.64, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:41<00:18,  1.70s/it]

{'eval_loss': 0.5364655256271362, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.219, 'eval_steps_per_second': 12.219, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.72s/it]

{'eval_loss': 0.5106485486030579, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.85, 'eval_steps_per_second': 12.85, 'epoch': 3.0}


100%|██████████| 33/33 [01:04<00:00,  1.97s/it]


{'train_runtime': 64.9618, 'train_samples_per_second': 3.879, 'train_steps_per_second': 0.508, 'train_loss': 0.6115992574980764, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.85it/s]
100%|██████████| 1/1 [00:00<00:00, 165.14it/s]
100%|██████████| 1/1 [00:00<00:00, 141.82it/s]
Casting the dataset: 100%|██████████| 84/84 [00:00<00:00, 42043.14 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 173.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be abl

{'eval_loss': 0.6815593242645264, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.38, 'eval_steps_per_second': 12.38, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:41<00:18,  1.70s/it]

{'eval_loss': 0.5364655256271362, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0954, 'eval_samples_per_second': 10.481, 'eval_steps_per_second': 10.481, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.70s/it]

{'eval_loss': 0.5106485486030579, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.368, 'eval_steps_per_second': 12.368, 'epoch': 3.0}


100%|██████████| 33/33 [01:04<00:00,  1.95s/it]


{'train_runtime': 64.3924, 'train_samples_per_second': 3.914, 'train_steps_per_second': 0.512, 'train_loss': 0.6115992574980764, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.41it/s]
100%|██████████| 1/1 [00:00<00:00, 165.20it/s]
100%|██████████| 1/1 [00:00<00:00, 164.95it/s]
Casting the dataset: 100%|██████████| 84/84 [00:00<00:00, 42008.05 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 176.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 322.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.63 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.6815593242645264, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.228, 'eval_steps_per_second': 12.228, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:18,  1.72s/it]

{'eval_loss': 0.5364655256271362, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.219, 'eval_steps_per_second': 12.219, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.70s/it]

{'eval_loss': 0.5106485486030579, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0763, 'eval_samples_per_second': 13.099, 'eval_steps_per_second': 13.099, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.98s/it]


{'train_runtime': 65.3054, 'train_samples_per_second': 3.859, 'train_steps_per_second': 0.505, 'train_loss': 0.6115992574980764, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.53it/s]
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
100%|██████████| 1/1 [00:00<00:00, 164.90it/s]
Casting the dataset: 100%|██████████| 84/84 [00:00<00:00, 41983.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 951.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 176.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 330.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.6815593242645264, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.371, 'eval_steps_per_second': 12.371, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:41<00:18,  1.68s/it]

{'eval_loss': 0.5364655256271362, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0774, 'eval_samples_per_second': 12.913, 'eval_steps_per_second': 12.913, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:02<00:00,  1.70s/it]

{'eval_loss': 0.5106485486030579, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.065, 'eval_steps_per_second': 12.065, 'epoch': 3.0}


100%|██████████| 33/33 [01:04<00:00,  1.95s/it]


{'train_runtime': 64.211, 'train_samples_per_second': 3.925, 'train_steps_per_second': 0.514, 'train_loss': 0.6115992574980764, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.76it/s]
100%|██████████| 1/1 [00:00<00:00, 164.73it/s]
100%|██████████| 1/1 [00:00<00:00, 164.87it/s]
Casting the dataset: 100%|██████████| 84/84 [00:00<00:00, 42038.13 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 179.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.6815593242645264, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.927, 'eval_steps_per_second': 11.927, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:18,  1.71s/it]

{'eval_loss': 0.5364655256271362, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0949, 'eval_samples_per_second': 10.534, 'eval_steps_per_second': 10.534, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.70s/it]

{'eval_loss': 0.5106485486030579, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.027, 'eval_steps_per_second': 13.027, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.97s/it]


{'train_runtime': 65.1012, 'train_samples_per_second': 3.871, 'train_steps_per_second': 0.507, 'train_loss': 0.6115992574980764, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 164.60it/s]
100%|██████████| 1/1 [00:00<00:00, 164.79it/s]
Casting the dataset: 100%|██████████| 85/85 [00:00<00:00, 28335.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 173.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.6821966171264648, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.081, 'eval_samples_per_second': 12.346, 'eval_steps_per_second': 12.346, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:19,  1.75s/it]

{'eval_loss': 0.5363342761993408, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.208, 'eval_steps_per_second': 12.208, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.76s/it]

{'eval_loss': 0.5129222273826599, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.373, 'eval_steps_per_second': 12.373, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.98s/it]


{'train_runtime': 65.3589, 'train_samples_per_second': 3.902, 'train_steps_per_second': 0.505, 'train_loss': 0.6112860477331913, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.61it/s]
100%|██████████| 1/1 [00:00<00:00, 142.82it/s]
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
Casting the dataset: 100%|██████████| 85/85 [00:00<00:00, 42518.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 175.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.6821966171264648, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0893, 'eval_samples_per_second': 11.197, 'eval_steps_per_second': 11.197, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:19,  1.78s/it]

{'eval_loss': 0.5363342761993408, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0843, 'eval_samples_per_second': 11.867, 'eval_steps_per_second': 11.867, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.76s/it]

{'eval_loss': 0.5129222273826599, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.847, 'eval_steps_per_second': 12.847, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  2.00s/it]


{'train_runtime': 65.9044, 'train_samples_per_second': 3.869, 'train_steps_per_second': 0.501, 'train_loss': 0.6112860477331913, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 173.38it/s]
100%|██████████| 1/1 [00:00<00:00, 163.99it/s]
100%|██████████| 1/1 [00:00<00:00, 197.22it/s]
Casting the dataset: 100%|██████████| 85/85 [00:00<00:00, 41712.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 175.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be abl

{'eval_loss': 0.6821966171264648, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.27, 'eval_steps_per_second': 12.27, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:19,  1.77s/it]

{'eval_loss': 0.5363342761993408, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.088, 'eval_samples_per_second': 11.364, 'eval_steps_per_second': 11.364, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.75s/it]

{'eval_loss': 0.5129222273826599, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.211, 'eval_steps_per_second': 12.211, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.98s/it]


{'train_runtime': 65.4594, 'train_samples_per_second': 3.896, 'train_steps_per_second': 0.504, 'train_loss': 0.6112860477331913, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 164.88it/s]
100%|██████████| 1/1 [00:00<00:00, 165.00it/s]
Casting the dataset: 100%|██████████| 85/85 [00:00<00:00, 42492.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 174.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.32 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.6821966171264648, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.371, 'eval_steps_per_second': 12.371, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:19,  1.77s/it]

{'eval_loss': 0.5363342761993408, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.92, 'eval_steps_per_second': 11.92, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.79s/it]

{'eval_loss': 0.5129222273826599, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.002, 'eval_steps_per_second': 13.002, 'epoch': 3.0}


100%|██████████| 33/33 [01:06<00:00,  2.01s/it]


{'train_runtime': 66.2455, 'train_samples_per_second': 3.849, 'train_steps_per_second': 0.498, 'train_loss': 0.6112860477331913, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.67it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 197.53it/s]
Casting the dataset: 100%|██████████| 85/85 [00:00<00:00, 41431.24 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 174.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 324.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.6821966171264648, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.94, 'eval_steps_per_second': 11.94, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:19,  1.75s/it]

{'eval_loss': 0.5363342761993408, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.064, 'eval_steps_per_second': 12.064, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.74s/it]

{'eval_loss': 0.5129222273826599, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.893, 'eval_steps_per_second': 11.893, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.97s/it]


{'train_runtime': 65.1096, 'train_samples_per_second': 3.916, 'train_steps_per_second': 0.507, 'train_loss': 0.6112860477331913, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.75it/s]
100%|██████████| 1/1 [00:00<00:00, 164.83it/s]
100%|██████████| 1/1 [00:00<00:00, 141.85it/s]
Casting the dataset: 100%|██████████| 86/86 [00:00<00:00, 43023.63 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 174.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 324.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.67 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.6814776062965393, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.849, 'eval_steps_per_second': 11.849, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:43<00:20,  1.82s/it]

{'eval_loss': 0.5359527468681335, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.518, 'eval_steps_per_second': 12.518, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.80s/it]

{'eval_loss': 0.5112550854682922, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.078, 'eval_samples_per_second': 12.822, 'eval_steps_per_second': 12.822, 'epoch': 3.0}


100%|██████████| 33/33 [01:06<00:00,  2.00s/it]


{'train_runtime': 66.1592, 'train_samples_per_second': 3.9, 'train_steps_per_second': 0.499, 'train_loss': 0.6110450282241359, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 165.28it/s]
100%|██████████| 1/1 [00:00<00:00, 142.88it/s]
Casting the dataset: 100%|██████████| 86/86 [00:00<00:00, 28215.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 951.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 175.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.6814776062965393, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.231, 'eval_steps_per_second': 12.231, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:19,  1.78s/it]

{'eval_loss': 0.5359527468681335, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.306, 'eval_steps_per_second': 12.306, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.81s/it]

{'eval_loss': 0.5112550854682922, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.08, 'eval_samples_per_second': 12.499, 'eval_steps_per_second': 12.499, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.98s/it]


{'train_runtime': 65.3224, 'train_samples_per_second': 3.95, 'train_steps_per_second': 0.505, 'train_loss': 0.6110450282241359, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 141.35it/s]
Casting the dataset: 100%|██████████| 86/86 [00:00<00:00, 42277.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 172.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 501.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.6814776062965393, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.221, 'eval_steps_per_second': 12.221, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:20,  1.82s/it]

{'eval_loss': 0.5359527468681335, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.064, 'eval_steps_per_second': 12.064, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:05<00:00,  1.83s/it]

{'eval_loss': 0.5112550854682922, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0749, 'eval_samples_per_second': 13.351, 'eval_steps_per_second': 13.351, 'epoch': 3.0}


100%|██████████| 33/33 [01:06<00:00,  2.02s/it]


{'train_runtime': 66.5031, 'train_samples_per_second': 3.88, 'train_steps_per_second': 0.496, 'train_loss': 0.6110450282241359, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.08it/s]
100%|██████████| 1/1 [00:00<00:00, 165.21it/s]
100%|██████████| 1/1 [00:00<00:00, 141.82it/s]
Casting the dataset: 100%|██████████| 86/86 [00:00<00:00, 43018.50 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 173.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.6814776062965393, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.208, 'eval_steps_per_second': 12.208, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:19,  1.81s/it]

{'eval_loss': 0.5359527468681335, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0738, 'eval_samples_per_second': 13.55, 'eval_steps_per_second': 13.55, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.82s/it]

{'eval_loss': 0.5112550854682922, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0757, 'eval_samples_per_second': 13.202, 'eval_steps_per_second': 13.202, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.99s/it]


{'train_runtime': 65.6175, 'train_samples_per_second': 3.932, 'train_steps_per_second': 0.503, 'train_loss': 0.6110450282241359, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.74it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
Casting the dataset: 100%|██████████| 86/86 [00:00<00:00, 43018.50 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 169.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.6814776062965393, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0857, 'eval_samples_per_second': 11.673, 'eval_steps_per_second': 11.673, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:20,  1.82s/it]

{'eval_loss': 0.5359527468681335, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.027, 'eval_steps_per_second': 13.027, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:05<00:00,  1.83s/it]

{'eval_loss': 0.5112550854682922, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0842, 'eval_samples_per_second': 11.882, 'eval_steps_per_second': 11.882, 'epoch': 3.0}


100%|██████████| 33/33 [01:06<00:00,  2.01s/it]


{'train_runtime': 66.4194, 'train_samples_per_second': 3.884, 'train_steps_per_second': 0.497, 'train_loss': 0.6110450282241359, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.91it/s]
100%|██████████| 1/1 [00:00<00:00, 165.23it/s]
100%|██████████| 1/1 [00:00<00:00, 165.05it/s]
Casting the dataset: 100%|██████████| 87/87 [00:00<00:00, 28988.28 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 170.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able

{'eval_loss': 0.6813478469848633, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0767, 'eval_samples_per_second': 13.032, 'eval_steps_per_second': 13.032, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:20,  1.83s/it]

{'eval_loss': 0.5352729558944702, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.38, 'eval_steps_per_second': 12.38, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.85s/it]

{'eval_loss': 0.5104317665100098, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0763, 'eval_samples_per_second': 13.112, 'eval_steps_per_second': 13.112, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  2.00s/it]


{'train_runtime': 65.9153, 'train_samples_per_second': 3.96, 'train_steps_per_second': 0.501, 'train_loss': 0.6106576630563447, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.59it/s]
100%|██████████| 1/1 [00:00<00:00, 196.90it/s]
100%|██████████| 1/1 [00:00<00:00, 165.10it/s]
Casting the dataset: 100%|██████████| 87/87 [00:00<00:00, 43472.06 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 171.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.18 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.6813478469848633, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.375, 'eval_steps_per_second': 12.375, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:20,  1.86s/it]

{'eval_loss': 0.5352729558944702, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.213, 'eval_steps_per_second': 12.213, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.86s/it]

{'eval_loss': 0.5104317665100098, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.013, 'eval_steps_per_second': 13.013, 'epoch': 3.0}


100%|██████████| 33/33 [01:06<00:00,  2.01s/it]


{'train_runtime': 66.2053, 'train_samples_per_second': 3.942, 'train_steps_per_second': 0.498, 'train_loss': 0.6106576630563447, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.63it/s]
100%|██████████| 1/1 [00:00<00:00, 165.07it/s]
100%|██████████| 1/1 [00:00<00:00, 164.92it/s]
Casting the dataset: 100%|██████████| 87/87 [00:00<00:00, 43446.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 173.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.6813478469848633, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.703, 'eval_steps_per_second': 11.703, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:20,  1.84s/it]

{'eval_loss': 0.5352729558944702, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.181, 'eval_steps_per_second': 12.181, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.85s/it]

{'eval_loss': 0.5104317665100098, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.282, 'eval_steps_per_second': 12.282, 'epoch': 3.0}


100%|██████████| 33/33 [01:06<00:00,  2.00s/it]


{'train_runtime': 66.0494, 'train_samples_per_second': 3.952, 'train_steps_per_second': 0.5, 'train_loss': 0.6106576630563447, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.06it/s]
100%|██████████| 1/1 [00:00<00:00, 141.70it/s]
100%|██████████| 1/1 [00:00<00:00, 164.73it/s]
Casting the dataset: 100%|██████████| 87/87 [00:00<00:00, 43482.42 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 170.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 346.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.6813478469848633, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0807, 'eval_samples_per_second': 12.386, 'eval_steps_per_second': 12.386, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:43<00:20,  1.85s/it]

{'eval_loss': 0.5352729558944702, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.771, 'eval_steps_per_second': 11.771, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:05<00:00,  1.88s/it]

{'eval_loss': 0.5104317665100098, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.994, 'eval_steps_per_second': 11.994, 'epoch': 3.0}


100%|██████████| 33/33 [01:06<00:00,  2.02s/it]


{'train_runtime': 66.6672, 'train_samples_per_second': 3.915, 'train_steps_per_second': 0.495, 'train_loss': 0.6106576630563447, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.62it/s]
100%|██████████| 1/1 [00:00<00:00, 166.61it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
Casting the dataset: 100%|██████████| 87/87 [00:00<00:00, 43492.78 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 175.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.83 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.6813478469848633, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.22, 'eval_steps_per_second': 12.22, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:20,  1.85s/it]

{'eval_loss': 0.5352729558944702, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 12.996, 'eval_steps_per_second': 12.996, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.84s/it]

{'eval_loss': 0.5104317665100098, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0817, 'eval_samples_per_second': 12.238, 'eval_steps_per_second': 12.238, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  2.00s/it]


{'train_runtime': 65.9699, 'train_samples_per_second': 3.956, 'train_steps_per_second': 0.5, 'train_loss': 0.6106576630563447, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.79it/s]
100%|██████████| 1/1 [00:00<00:00, 165.25it/s]
100%|██████████| 1/1 [00:00<00:00, 141.79it/s]
Casting the dataset: 100%|██████████| 88/88 [00:00<00:00, 28844.85 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 172.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.6814780831336975, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.225, 'eval_steps_per_second': 12.225, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:20,  1.90s/it]

{'eval_loss': 0.53522789478302, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.217, 'eval_steps_per_second': 12.217, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.88s/it]

{'eval_loss': 0.5102000832557678, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.913, 'eval_steps_per_second': 11.913, 'epoch': 3.0}


100%|██████████| 33/33 [01:06<00:00,  2.00s/it]


{'train_runtime': 66.0813, 'train_samples_per_second': 3.995, 'train_steps_per_second': 0.499, 'train_loss': 0.6103414477724017, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.98it/s]
100%|██████████| 1/1 [00:00<00:00, 141.10it/s]
100%|██████████| 1/1 [00:00<00:00, 165.11it/s]
Casting the dataset: 100%|██████████| 88/88 [00:00<00:00, 29330.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 169.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be abl

{'eval_loss': 0.6814780831336975, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.924, 'eval_steps_per_second': 11.924, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:20,  1.89s/it]

{'eval_loss': 0.53522789478302, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0832, 'eval_samples_per_second': 12.026, 'eval_steps_per_second': 12.026, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.89s/it]

{'eval_loss': 0.5102000832557678, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.776, 'eval_steps_per_second': 11.776, 'epoch': 3.0}


100%|██████████| 33/33 [01:06<00:00,  2.01s/it]


{'train_runtime': 66.2371, 'train_samples_per_second': 3.986, 'train_steps_per_second': 0.498, 'train_loss': 0.6103414477724017, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.62it/s]
100%|██████████| 1/1 [00:00<00:00, 164.94it/s]
100%|██████████| 1/1 [00:00<00:00, 164.87it/s]
Casting the dataset: 100%|██████████| 88/88 [00:00<00:00, 44045.20 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.94 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 167.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 324.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 321.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream tas

{'eval_loss': 0.6814780831336975, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.196, 'eval_steps_per_second': 12.196, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:20,  1.88s/it]

{'eval_loss': 0.53522789478302, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0853, 'eval_samples_per_second': 11.729, 'eval_steps_per_second': 11.729, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.87s/it]

{'eval_loss': 0.5102000832557678, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.221, 'eval_steps_per_second': 12.221, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.99s/it]


{'train_runtime': 65.6165, 'train_samples_per_second': 4.023, 'train_steps_per_second': 0.503, 'train_loss': 0.6103414477724017, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.80it/s]
100%|██████████| 1/1 [00:00<00:00, 164.77it/s]
100%|██████████| 1/1 [00:00<00:00, 165.01it/s]
Casting the dataset: 100%|██████████| 88/88 [00:00<00:00, 42928.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 166.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.6814780831336975, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.378, 'eval_steps_per_second': 12.378, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:20,  1.89s/it]

{'eval_loss': 0.53522789478302, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.004, 'eval_steps_per_second': 12.004, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.87s/it]

{'eval_loss': 0.5102000832557678, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.361, 'eval_steps_per_second': 12.361, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.99s/it]


{'train_runtime': 65.8043, 'train_samples_per_second': 4.012, 'train_steps_per_second': 0.501, 'train_loss': 0.6103414477724017, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
100%|██████████| 1/1 [00:00<00:00, 165.24it/s]
Casting the dataset: 100%|██████████| 88/88 [00:00<00:00, 29335.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 168.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.6814780831336975, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.282, 'eval_steps_per_second': 12.282, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:20,  1.85s/it]

{'eval_loss': 0.53522789478302, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.645, 'eval_steps_per_second': 11.645, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.87s/it]

{'eval_loss': 0.5102000832557678, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.789, 'eval_steps_per_second': 11.789, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.99s/it]


{'train_runtime': 65.6793, 'train_samples_per_second': 4.02, 'train_steps_per_second': 0.502, 'train_loss': 0.6103414477724017, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.14it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 89/89 [00:00<00:00, 28995.89 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.19 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 168.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.669119119644165, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.505, 'eval_steps_per_second': 11.505, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:18,  1.55s/it]

{'eval_loss': 0.5243489742279053, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.07, 'eval_steps_per_second': 12.07, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.54s/it]

{'eval_loss': 0.4870404601097107, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.763, 'eval_steps_per_second': 11.763, 'epoch': 3.0}


100%|██████████| 36/36 [01:08<00:00,  1.90s/it]


{'train_runtime': 68.5326, 'train_samples_per_second': 3.896, 'train_steps_per_second': 0.525, 'train_loss': 0.5646546681722006, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.65it/s]
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
100%|██████████| 1/1 [00:00<00:00, 76.54it/s]
Casting the dataset: 100%|██████████| 89/89 [00:00<00:00, 44529.77 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 487.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 169.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5115832090377808, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.2, 'eval_steps_per_second': 12.2, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:18,  1.54s/it]

{'eval_loss': 0.3709207773208618, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.961, 'eval_steps_per_second': 11.961, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:06<00:00,  1.54s/it]

{'eval_loss': 0.34911856055259705, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0875, 'eval_samples_per_second': 11.43, 'eval_steps_per_second': 11.43, 'epoch': 3.0}


100%|██████████| 36/36 [01:08<00:00,  1.90s/it]


{'train_runtime': 68.2864, 'train_samples_per_second': 3.91, 'train_steps_per_second': 0.527, 'train_loss': 0.4488968319363064, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 90.48it/s]
100%|██████████| 1/1 [00:00<00:00, 164.83it/s]
100%|██████████| 1/1 [00:00<00:00, 82.56it/s]
Casting the dataset: 100%|██████████| 89/89 [00:00<00:00, 44540.40 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 958.26 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 167.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sho

{'eval_loss': 0.5115832090377808, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.376, 'eval_steps_per_second': 13.376, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:18,  1.53s/it]

{'eval_loss': 0.3709207773208618, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.364, 'eval_steps_per_second': 12.364, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.57s/it]

{'eval_loss': 0.34911856055259705, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.371, 'eval_steps_per_second': 12.371, 'epoch': 3.0}


100%|██████████| 36/36 [01:08<00:00,  1.91s/it]


{'train_runtime': 68.8013, 'train_samples_per_second': 3.881, 'train_steps_per_second': 0.523, 'train_loss': 0.4488968319363064, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.97it/s]
100%|██████████| 1/1 [00:00<00:00, 164.91it/s]
100%|██████████| 1/1 [00:00<00:00, 141.61it/s]
Casting the dataset: 100%|██████████| 89/89 [00:00<00:00, 43396.08 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 894.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 167.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5115832090377808, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.22, 'eval_steps_per_second': 12.22, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:18,  1.57s/it]

{'eval_loss': 0.3709207773208618, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.363, 'eval_steps_per_second': 12.363, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.56s/it]

{'eval_loss': 0.34911856055259705, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.93, 'eval_steps_per_second': 11.93, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.94s/it]


{'train_runtime': 69.7662, 'train_samples_per_second': 3.827, 'train_steps_per_second': 0.516, 'train_loss': 0.4488968319363064, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
100%|██████████| 1/1 [00:00<00:00, 165.07it/s]
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
Casting the dataset: 100%|██████████| 89/89 [00:00<00:00, 43532.72 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 166.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.63 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5115832090377808, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.079, 'eval_steps_per_second': 12.079, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:18,  1.52s/it]

{'eval_loss': 0.3709207773208618, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.219, 'eval_steps_per_second': 12.219, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.55s/it]

{'eval_loss': 0.34911856055259705, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.788, 'eval_steps_per_second': 11.788, 'epoch': 3.0}


100%|██████████| 36/36 [01:08<00:00,  1.91s/it]


{'train_runtime': 68.6286, 'train_samples_per_second': 3.891, 'train_steps_per_second': 0.525, 'train_loss': 0.4488968319363064, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 163.67it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
Casting the dataset: 100%|██████████| 90/90 [00:00<00:00, 29985.49 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 954.77 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 166.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.48 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.5115789771080017, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.22, 'eval_steps_per_second': 12.22, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:19,  1.63s/it]

{'eval_loss': 0.368266761302948, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.212, 'eval_steps_per_second': 12.212, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.60s/it]

{'eval_loss': 0.34677404165267944, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0863, 'eval_samples_per_second': 11.594, 'eval_steps_per_second': 11.594, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.94s/it]


{'train_runtime': 69.8931, 'train_samples_per_second': 3.863, 'train_steps_per_second': 0.515, 'train_loss': 0.447705692715115, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 165.10it/s]
100%|██████████| 1/1 [00:00<00:00, 164.56it/s]
Casting the dataset: 100%|██████████| 90/90 [00:00<00:00, 43705.84 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 156.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5115789771080017, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.221, 'eval_steps_per_second': 12.221, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:18,  1.58s/it]

{'eval_loss': 0.368266761302948, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.787, 'eval_steps_per_second': 11.787, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.59s/it]

{'eval_loss': 0.34677404165267944, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.84, 'eval_steps_per_second': 12.84, 'epoch': 3.0}


100%|██████████| 36/36 [01:08<00:00,  1.92s/it]


{'train_runtime': 68.9855, 'train_samples_per_second': 3.914, 'train_steps_per_second': 0.522, 'train_loss': 0.447705692715115, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.05it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
100%|██████████| 1/1 [00:00<00:00, 164.33it/s]
Casting the dataset: 100%|██████████| 90/90 [00:00<00:00, 29980.73 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 965.98 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 163.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 316.22 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5115789771080017, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0817, 'eval_samples_per_second': 12.233, 'eval_steps_per_second': 12.233, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:19,  1.62s/it]

{'eval_loss': 0.368266761302948, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.07, 'eval_steps_per_second': 12.07, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.61s/it]

{'eval_loss': 0.34677404165267944, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.779, 'eval_steps_per_second': 11.779, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.95s/it]


{'train_runtime': 70.0483, 'train_samples_per_second': 3.854, 'train_steps_per_second': 0.514, 'train_loss': 0.447705692715115, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 165.16it/s]
100%|██████████| 1/1 [00:00<00:00, 141.34it/s]
Casting the dataset: 100%|██████████| 90/90 [00:00<00:00, 29317.13 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 958.26 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 165.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.69 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.5115789771080017, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0805, 'eval_samples_per_second': 12.422, 'eval_steps_per_second': 12.422, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:19,  1.59s/it]

{'eval_loss': 0.368266761302948, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.854, 'eval_steps_per_second': 11.854, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.59s/it]

{'eval_loss': 0.34677404165267944, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.219, 'eval_steps_per_second': 12.219, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.92s/it]


{'train_runtime': 69.1649, 'train_samples_per_second': 3.904, 'train_steps_per_second': 0.52, 'train_loss': 0.447705692715115, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
100%|██████████| 1/1 [00:00<00:00, 164.44it/s]
100%|██████████| 1/1 [00:00<00:00, 164.72it/s]
Casting the dataset: 100%|██████████| 90/90 [00:00<00:00, 44047.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 165.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5115789771080017, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.081, 'eval_steps_per_second': 12.081, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:19,  1.60s/it]

{'eval_loss': 0.368266761302948, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.088, 'eval_samples_per_second': 11.369, 'eval_steps_per_second': 11.369, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.65s/it]

{'eval_loss': 0.34677404165267944, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.081, 'eval_samples_per_second': 12.345, 'eval_steps_per_second': 12.345, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.95s/it]


{'train_runtime': 70.1397, 'train_samples_per_second': 3.849, 'train_steps_per_second': 0.513, 'train_loss': 0.447705692715115, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.62it/s]
100%|██████████| 1/1 [00:00<00:00, 141.49it/s]
100%|██████████| 1/1 [00:00<00:00, 164.90it/s]
Casting the dataset: 100%|██████████| 91/91 [00:00<00:00, 30342.77 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 958.70 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 160.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.5107885003089905, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1345, 'eval_samples_per_second': 7.436, 'eval_steps_per_second': 7.436, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:19,  1.65s/it]

{'eval_loss': 0.3673144280910492, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1122, 'eval_samples_per_second': 8.912, 'eval_steps_per_second': 8.912, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.63s/it]

{'eval_loss': 0.3452746570110321, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.1093, 'eval_samples_per_second': 9.147, 'eval_steps_per_second': 9.147, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.93s/it]


{'train_runtime': 69.6, 'train_samples_per_second': 3.922, 'train_steps_per_second': 0.517, 'train_loss': 0.4470203187730577, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 165.02it/s]
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
Casting the dataset: 100%|██████████| 91/91 [00:00<00:00, 45525.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 156.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should p

{'eval_loss': 0.5107885003089905, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1432, 'eval_samples_per_second': 6.982, 'eval_steps_per_second': 6.982, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:19,  1.66s/it]

{'eval_loss': 0.3673144280910492, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1213, 'eval_samples_per_second': 8.245, 'eval_steps_per_second': 8.245, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.66s/it]

{'eval_loss': 0.3452746570110321, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.1399, 'eval_samples_per_second': 7.15, 'eval_steps_per_second': 7.15, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.96s/it]


{'train_runtime': 70.4005, 'train_samples_per_second': 3.878, 'train_steps_per_second': 0.511, 'train_loss': 0.4470203187730577, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.95it/s]
100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 91/91 [00:00<00:00, 45514.15 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 958.04 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 160.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5107885003089905, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1343, 'eval_samples_per_second': 7.444, 'eval_steps_per_second': 7.444, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:19,  1.65s/it]

{'eval_loss': 0.3673144280910492, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1284, 'eval_samples_per_second': 7.791, 'eval_steps_per_second': 7.791, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.62s/it]

{'eval_loss': 0.3452746570110321, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0909, 'eval_samples_per_second': 10.999, 'eval_steps_per_second': 10.999, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.93s/it]


{'train_runtime': 69.5038, 'train_samples_per_second': 3.928, 'train_steps_per_second': 0.518, 'train_loss': 0.4470203187730577, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.79it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
Casting the dataset: 100%|██████████| 91/91 [00:00<00:00, 45552.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 958.48 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 159.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.5107885003089905, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1306, 'eval_samples_per_second': 7.656, 'eval_steps_per_second': 7.656, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:19,  1.64s/it]

{'eval_loss': 0.3673144280910492, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1283, 'eval_samples_per_second': 7.792, 'eval_steps_per_second': 7.792, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.68s/it]

{'eval_loss': 0.3452746570110321, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.1285, 'eval_samples_per_second': 7.785, 'eval_steps_per_second': 7.785, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.96s/it]


{'train_runtime': 70.4091, 'train_samples_per_second': 3.877, 'train_steps_per_second': 0.511, 'train_loss': 0.4470203187730577, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.85it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
Casting the dataset: 100%|██████████| 91/91 [00:00<00:00, 45503.30 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 160.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 314.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5107885003089905, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.1456, 'eval_samples_per_second': 6.867, 'eval_steps_per_second': 6.867, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:19,  1.62s/it]

{'eval_loss': 0.3673144280910492, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.047, 'eval_steps_per_second': 12.047, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.62s/it]

{'eval_loss': 0.3452746570110321, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.1288, 'eval_samples_per_second': 7.765, 'eval_steps_per_second': 7.765, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.92s/it]


{'train_runtime': 69.1281, 'train_samples_per_second': 3.949, 'train_steps_per_second': 0.521, 'train_loss': 0.4470203187730577, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 165.02it/s]
100%|██████████| 1/1 [00:00<00:00, 165.03it/s]
Casting the dataset: 100%|██████████| 92/92 [00:00<00:00, 30666.45 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.94 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 162.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.5106738209724426, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.223, 'eval_steps_per_second': 12.223, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:20,  1.68s/it]

{'eval_loss': 0.36629346013069153, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.378, 'eval_steps_per_second': 12.378, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.70s/it]

{'eval_loss': 0.34426942467689514, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.061, 'eval_steps_per_second': 12.061, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.95s/it]


{'train_runtime': 70.2031, 'train_samples_per_second': 3.931, 'train_steps_per_second': 0.513, 'train_loss': 0.446561336517334, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.95it/s]
100%|██████████| 1/1 [00:00<00:00, 164.83it/s]
100%|██████████| 1/1 [00:00<00:00, 164.94it/s]
Casting the dataset: 100%|██████████| 92/92 [00:00<00:00, 36722.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 957.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 160.47 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5106738209724426, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.508, 'eval_steps_per_second': 11.508, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:20,  1.70s/it]

{'eval_loss': 0.36629346013069153, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.208, 'eval_steps_per_second': 12.208, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.68s/it]

{'eval_loss': 0.34426942467689514, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.486, 'eval_steps_per_second': 11.486, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.95s/it]


{'train_runtime': 70.1645, 'train_samples_per_second': 3.934, 'train_steps_per_second': 0.513, 'train_loss': 0.446561336517334, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.07it/s]
100%|██████████| 1/1 [00:00<00:00, 164.45it/s]
100%|██████████| 1/1 [00:00<00:00, 141.30it/s]
Casting the dataset: 100%|██████████| 92/92 [00:00<00:00, 45269.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 958.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 156.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.73 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.5106738209724426, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.373, 'eval_steps_per_second': 11.373, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:20,  1.71s/it]

{'eval_loss': 0.36629346013069153, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.079, 'eval_steps_per_second': 12.079, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.73s/it]

{'eval_loss': 0.34426942467689514, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.37, 'eval_steps_per_second': 12.37, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  1.98s/it]


{'train_runtime': 71.2239, 'train_samples_per_second': 3.875, 'train_steps_per_second': 0.505, 'train_loss': 0.446561336517334, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 141.75it/s]
100%|██████████| 1/1 [00:00<00:00, 166.66it/s]
Casting the dataset: 100%|██████████| 92/92 [00:00<00:00, 45964.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 160.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pr

{'eval_loss': 0.5106738209724426, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.372, 'eval_steps_per_second': 12.372, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:20,  1.68s/it]

{'eval_loss': 0.36629346013069153, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.928, 'eval_steps_per_second': 11.928, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.69s/it]

{'eval_loss': 0.34426942467689514, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.928, 'eval_steps_per_second': 11.928, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.94s/it]


{'train_runtime': 69.9478, 'train_samples_per_second': 3.946, 'train_steps_per_second': 0.515, 'train_loss': 0.446561336517334, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.72it/s]
100%|██████████| 1/1 [00:00<00:00, 164.85it/s]
100%|██████████| 1/1 [00:00<00:00, 164.75it/s]
Casting the dataset: 100%|██████████| 92/92 [00:00<00:00, 45948.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 160.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.73 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.5106738209724426, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.926, 'eval_steps_per_second': 11.926, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:20,  1.72s/it]

{'eval_loss': 0.36629346013069153, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.379, 'eval_steps_per_second': 12.379, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.70s/it]

{'eval_loss': 0.34426942467689514, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.354, 'eval_steps_per_second': 12.354, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.97s/it]


{'train_runtime': 70.9071, 'train_samples_per_second': 3.892, 'train_steps_per_second': 0.508, 'train_loss': 0.446561336517334, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 82.87it/s]
100%|██████████| 1/1 [00:00<00:00, 164.88it/s]
100%|██████████| 1/1 [00:00<00:00, 141.72it/s]
Casting the dataset: 100%|██████████| 93/93 [00:00<00:00, 46503.37 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 160.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 315.48 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.5102746486663818, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.785, 'eval_steps_per_second': 11.785, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:20,  1.74s/it]

{'eval_loss': 0.36590704321861267, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0749, 'eval_samples_per_second': 13.356, 'eval_steps_per_second': 13.356, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.76s/it]

{'eval_loss': 0.34398362040519714, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.059, 'eval_steps_per_second': 12.059, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.96s/it]


{'train_runtime': 70.6826, 'train_samples_per_second': 3.947, 'train_steps_per_second': 0.509, 'train_loss': 0.4463659922281901, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
100%|██████████| 1/1 [00:00<00:00, 165.06it/s]
100%|██████████| 1/1 [00:00<00:00, 164.76it/s]
Casting the dataset: 100%|██████████| 93/93 [00:00<00:00, 31021.97 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 160.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 462.34 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5102746486663818, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.204, 'eval_steps_per_second': 12.204, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:21,  1.78s/it]

{'eval_loss': 0.36590704321861267, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.004, 'eval_steps_per_second': 13.004, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:10<00:00,  1.76s/it]

{'eval_loss': 0.34398362040519714, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.839, 'eval_steps_per_second': 12.839, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  1.98s/it]


{'train_runtime': 71.4559, 'train_samples_per_second': 3.905, 'train_steps_per_second': 0.504, 'train_loss': 0.4463659922281901, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 164.59it/s]
100%|██████████| 1/1 [00:00<00:00, 165.01it/s]
Casting the dataset: 100%|██████████| 93/93 [00:00<00:00, 46464.59 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 944.24 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 160.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 318.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5102746486663818, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.921, 'eval_steps_per_second': 11.921, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:20,  1.75s/it]

{'eval_loss': 0.36590704321861267, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.065, 'eval_steps_per_second': 12.065, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.74s/it]

{'eval_loss': 0.34398362040519714, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.64, 'eval_steps_per_second': 11.64, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.96s/it]


{'train_runtime': 70.5477, 'train_samples_per_second': 3.955, 'train_steps_per_second': 0.51, 'train_loss': 0.4463659922281901, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
100%|██████████| 1/1 [00:00<00:00, 166.75it/s]
100%|██████████| 1/1 [00:00<00:00, 165.26it/s]
Casting the dataset: 100%|██████████| 93/93 [00:00<00:00, 46525.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 161.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.5102746486663818, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.791, 'eval_steps_per_second': 11.791, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:21,  1.77s/it]

{'eval_loss': 0.36590704321861267, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.639, 'eval_steps_per_second': 11.639, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:10<00:00,  1.78s/it]

{'eval_loss': 0.34398362040519714, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.795, 'eval_steps_per_second': 11.795, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  1.99s/it]


{'train_runtime': 71.5416, 'train_samples_per_second': 3.9, 'train_steps_per_second': 0.503, 'train_loss': 0.4463659922281901, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.34it/s]
100%|██████████| 1/1 [00:00<00:00, 141.59it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
Casting the dataset: 100%|██████████| 93/93 [00:00<00:00, 30548.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 160.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.5102746486663818, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.058, 'eval_steps_per_second': 12.058, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:21,  1.75s/it]

{'eval_loss': 0.36590704321861267, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.926, 'eval_steps_per_second': 11.926, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.73s/it]

{'eval_loss': 0.34398362040519714, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.775, 'eval_steps_per_second': 11.775, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.96s/it]


{'train_runtime': 70.6715, 'train_samples_per_second': 3.948, 'train_steps_per_second': 0.509, 'train_loss': 0.4463659922281901, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
100%|██████████| 1/1 [00:00<00:00, 164.97it/s]
Casting the dataset: 100%|██████████| 94/94 [00:00<00:00, 45978.38 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 94/94 [00:00<00:00, 159.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 331.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.5110209584236145, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.825, 'eval_steps_per_second': 11.825, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:21,  1.80s/it]

{'eval_loss': 0.3659621477127075, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.363, 'eval_steps_per_second': 12.363, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:10<00:00,  1.83s/it]

{'eval_loss': 0.3439088761806488, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0774, 'eval_samples_per_second': 12.927, 'eval_steps_per_second': 12.927, 'epoch': 3.0}


100%|██████████| 36/36 [01:12<00:00,  2.00s/it]


{'train_runtime': 72.0509, 'train_samples_per_second': 3.914, 'train_steps_per_second': 0.5, 'train_loss': 0.4461934831407335, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.76it/s]
100%|██████████| 1/1 [00:00<00:00, 162.41it/s]
100%|██████████| 1/1 [00:00<00:00, 141.75it/s]
Casting the dataset: 100%|██████████| 94/94 [00:00<00:00, 43988.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 952.82 examples/s]
Map: 100%|██████████| 94/94 [00:00<00:00, 157.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5110209584236145, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.013, 'eval_steps_per_second': 13.013, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:21,  1.78s/it]

{'eval_loss': 0.3659621477127075, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0825, 'eval_samples_per_second': 12.128, 'eval_steps_per_second': 12.128, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.81s/it]

{'eval_loss': 0.3439088761806488, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.066, 'eval_steps_per_second': 12.066, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.97s/it]


{'train_runtime': 70.9603, 'train_samples_per_second': 3.974, 'train_steps_per_second': 0.507, 'train_loss': 0.4461934831407335, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.02it/s]
100%|██████████| 1/1 [00:00<00:00, 164.10it/s]
100%|██████████| 1/1 [00:00<00:00, 164.32it/s]
Casting the dataset: 100%|██████████| 94/94 [00:00<00:00, 30937.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 960.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 94/94 [00:00<00:00, 158.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 484.55 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5110209584236145, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0864, 'eval_samples_per_second': 11.576, 'eval_steps_per_second': 11.576, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:21,  1.82s/it]

{'eval_loss': 0.3659621477127075, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0803, 'eval_samples_per_second': 12.446, 'eval_steps_per_second': 12.446, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:10<00:00,  1.80s/it]

{'eval_loss': 0.3439088761806488, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.778, 'eval_steps_per_second': 11.778, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  2.00s/it]


{'train_runtime': 71.8462, 'train_samples_per_second': 3.925, 'train_steps_per_second': 0.501, 'train_loss': 0.4461934831407335, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.84it/s]
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
100%|██████████| 1/1 [00:00<00:00, 164.33it/s]
Casting the dataset: 100%|██████████| 94/94 [00:00<00:00, 31310.72 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 956.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 94/94 [00:00<00:00, 159.92 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.5110209584236145, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0804, 'eval_samples_per_second': 12.432, 'eval_steps_per_second': 12.432, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:21,  1.78s/it]

{'eval_loss': 0.3659621477127075, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.78, 'eval_steps_per_second': 11.78, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.81s/it]

{'eval_loss': 0.3439088761806488, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.51, 'eval_steps_per_second': 12.51, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  1.98s/it]


{'train_runtime': 71.1657, 'train_samples_per_second': 3.963, 'train_steps_per_second': 0.506, 'train_loss': 0.4461934831407335, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.00it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
100%|██████████| 1/1 [00:00<00:00, 142.90it/s]
Casting the dataset: 100%|██████████| 94/94 [00:00<00:00, 47003.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 874.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 94/94 [00:00<00:00, 160.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.5110209584236145, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0895, 'eval_samples_per_second': 11.169, 'eval_steps_per_second': 11.169, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:21,  1.80s/it]

{'eval_loss': 0.3659621477127075, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0873, 'eval_samples_per_second': 11.449, 'eval_steps_per_second': 11.449, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:10<00:00,  1.81s/it]

{'eval_loss': 0.3439088761806488, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.017, 'eval_steps_per_second': 13.017, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  1.99s/it]


{'train_runtime': 71.5007, 'train_samples_per_second': 3.944, 'train_steps_per_second': 0.503, 'train_loss': 0.4461934831407335, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.92it/s]
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
100%|██████████| 1/1 [00:00<00:00, 164.90it/s]
Casting the dataset: 100%|██████████| 95/95 [00:00<00:00, 31696.67 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 943.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 95/95 [00:00<00:00, 158.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 457.64 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 321.67 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.510875940322876, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.221, 'eval_steps_per_second': 12.221, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:22,  1.85s/it]

{'eval_loss': 0.3660253882408142, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.191, 'eval_steps_per_second': 13.191, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:10<00:00,  1.84s/it]

{'eval_loss': 0.3439323604106903, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0789, 'eval_samples_per_second': 12.678, 'eval_steps_per_second': 12.678, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  1.99s/it]


{'train_runtime': 71.633, 'train_samples_per_second': 3.979, 'train_steps_per_second': 0.503, 'train_loss': 0.44616900549994576, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
100%|██████████| 1/1 [00:00<00:00, 141.43it/s]
100%|██████████| 1/1 [00:00<00:00, 165.02it/s]
Casting the dataset: 100%|██████████| 95/95 [00:00<00:00, 31668.96 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 631.86 examples/s]
Map: 100%|██████████| 95/95 [00:00<00:00, 159.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.81 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.510875940322876, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.078, 'eval_samples_per_second': 12.817, 'eval_steps_per_second': 12.817, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:47<00:22,  1.89s/it]

{'eval_loss': 0.3660253882408142, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.786, 'eval_steps_per_second': 11.786, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:11<00:00,  1.87s/it]

{'eval_loss': 0.3439323604106903, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.373, 'eval_steps_per_second': 12.373, 'epoch': 3.0}


100%|██████████| 36/36 [01:13<00:00,  2.03s/it]


{'train_runtime': 73.0023, 'train_samples_per_second': 3.904, 'train_steps_per_second': 0.493, 'train_loss': 0.44616900549994576, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 141.51it/s]
100%|██████████| 1/1 [00:00<00:00, 141.77it/s]
Casting the dataset: 100%|██████████| 95/95 [00:00<00:00, 46581.59 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 95/95 [00:00<00:00, 158.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.510875940322876, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0825, 'eval_samples_per_second': 12.127, 'eval_steps_per_second': 12.127, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:22,  1.84s/it]

{'eval_loss': 0.3660253882408142, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.093, 'eval_samples_per_second': 10.751, 'eval_steps_per_second': 10.751, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:10<00:00,  1.86s/it]

{'eval_loss': 0.3439323604106903, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.646, 'eval_steps_per_second': 11.646, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  1.98s/it]


{'train_runtime': 71.3912, 'train_samples_per_second': 3.992, 'train_steps_per_second': 0.504, 'train_loss': 0.44616900549994576, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 82.51it/s]
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
100%|██████████| 1/1 [00:00<00:00, 165.24it/s]
Casting the dataset: 100%|██████████| 95/95 [00:00<00:00, 47526.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 95/95 [00:00<00:00, 157.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.510875940322876, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.531, 'eval_steps_per_second': 12.531, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:22,  1.84s/it]

{'eval_loss': 0.3660253882408142, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.218, 'eval_steps_per_second': 12.218, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:10<00:00,  1.83s/it]

{'eval_loss': 0.3439323604106903, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.195, 'eval_steps_per_second': 12.195, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  1.99s/it]


{'train_runtime': 71.5434, 'train_samples_per_second': 3.984, 'train_steps_per_second': 0.503, 'train_loss': 0.44616900549994576, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
100%|██████████| 1/1 [00:00<00:00, 165.10it/s]
Casting the dataset: 100%|██████████| 95/95 [00:00<00:00, 47537.45 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 962.22 examples/s]
Map: 100%|██████████| 95/95 [00:00<00:00, 156.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.510875940322876, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.776, 'eval_steps_per_second': 11.776, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:21,  1.81s/it]

{'eval_loss': 0.3660253882408142, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.854, 'eval_steps_per_second': 11.854, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.83s/it]

{'eval_loss': 0.3439323604106903, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.766, 'eval_steps_per_second': 11.766, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.97s/it]


{'train_runtime': 70.8428, 'train_samples_per_second': 4.023, 'train_steps_per_second': 0.508, 'train_loss': 0.44616900549994576, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.61it/s]
100%|██████████| 1/1 [00:00<00:00, 165.01it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
Casting the dataset: 100%|██████████| 96/96 [00:00<00:00, 32007.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 152.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.5110331177711487, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0899, 'eval_samples_per_second': 11.122, 'eval_steps_per_second': 11.122, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:22,  1.86s/it]

{'eval_loss': 0.3660145699977875, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.13, 'eval_steps_per_second': 12.13, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:10<00:00,  1.88s/it]

{'eval_loss': 0.3438796401023865, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.285, 'eval_steps_per_second': 12.285, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  1.99s/it]


{'train_runtime': 71.6371, 'train_samples_per_second': 4.02, 'train_steps_per_second': 0.503, 'train_loss': 0.4461536937289768, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 141.38it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 96/96 [00:00<00:00, 48014.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 960.89 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 154.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5110331177711487, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.369, 'eval_steps_per_second': 12.369, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:22,  1.89s/it]

{'eval_loss': 0.3660145699977875, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0865, 'eval_samples_per_second': 11.567, 'eval_steps_per_second': 11.567, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:10<00:00,  1.89s/it]

{'eval_loss': 0.3438796401023865, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.356, 'eval_steps_per_second': 12.356, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  2.00s/it]


{'train_runtime': 71.9073, 'train_samples_per_second': 4.005, 'train_steps_per_second': 0.501, 'train_loss': 0.4461536937289768, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.69it/s]
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
100%|██████████| 1/1 [00:00<00:00, 164.95it/s]
Casting the dataset: 100%|██████████| 96/96 [00:00<00:00, 31504.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 151.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
Yo

{'eval_loss': 0.5110331177711487, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.524, 'eval_steps_per_second': 12.524, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:22,  1.84s/it]

{'eval_loss': 0.3660145699977875, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.366, 'eval_steps_per_second': 12.366, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.87s/it]

{'eval_loss': 0.3438796401023865, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.068, 'eval_steps_per_second': 12.068, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  1.98s/it]


{'train_runtime': 71.174, 'train_samples_per_second': 4.046, 'train_steps_per_second': 0.506, 'train_loss': 0.4461536937289768, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.99it/s]
100%|██████████| 1/1 [00:00<00:00, 165.08it/s]
100%|██████████| 1/1 [00:00<00:00, 164.89it/s]
Casting the dataset: 100%|██████████| 96/96 [00:00<00:00, 48009.20 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 155.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.48 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5110331177711487, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.516, 'eval_steps_per_second': 11.516, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:22,  1.88s/it]

{'eval_loss': 0.3660145699977875, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.853, 'eval_steps_per_second': 11.853, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:10<00:00,  1.88s/it]

{'eval_loss': 0.3438796401023865, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.092, 'eval_samples_per_second': 10.866, 'eval_steps_per_second': 10.866, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  1.98s/it]


{'train_runtime': 71.4028, 'train_samples_per_second': 4.033, 'train_steps_per_second': 0.504, 'train_loss': 0.4461536937289768, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.18it/s]
100%|██████████| 1/1 [00:00<00:00, 164.59it/s]
100%|██████████| 1/1 [00:00<00:00, 164.72it/s]
Casting the dataset: 100%|██████████| 96/96 [00:00<00:00, 32009.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.22 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 154.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.53 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.5110331177711487, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.21, 'eval_steps_per_second': 12.21, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:22,  1.88s/it]

{'eval_loss': 0.3660145699977875, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.845, 'eval_steps_per_second': 12.845, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:10<00:00,  1.85s/it]

{'eval_loss': 0.3438796401023865, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.081, 'eval_samples_per_second': 12.35, 'eval_steps_per_second': 12.35, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  1.98s/it]


{'train_runtime': 71.4281, 'train_samples_per_second': 4.032, 'train_steps_per_second': 0.504, 'train_loss': 0.4461536937289768, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.70it/s]
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
Casting the dataset: 100%|██████████| 97/97 [00:00<00:00, 31844.67 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Map: 100%|██████████| 97/97 [00:00<00:00, 151.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.4841166138648987, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.214, 'eval_steps_per_second': 12.214, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 26/39 [00:47<00:19,  1.51s/it]

{'eval_loss': 0.35222187638282776, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0789, 'eval_samples_per_second': 12.678, 'eval_steps_per_second': 12.678, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 39/39 [01:11<00:00,  1.53s/it]

{'eval_loss': 0.32843518257141113, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0855, 'eval_samples_per_second': 11.691, 'eval_steps_per_second': 11.691, 'epoch': 3.0}


100%|██████████| 39/39 [01:12<00:00,  1.87s/it]


{'train_runtime': 72.7948, 'train_samples_per_second': 3.998, 'train_steps_per_second': 0.536, 'train_loss': 0.41259511311848956, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
100%|██████████| 1/1 [00:00<00:00, 164.59it/s]
100%|██████████| 1/1 [00:00<00:00, 82.53it/s]
Casting the dataset: 100%|██████████| 97/97 [00:00<00:00, 48486.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 951.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 953.03 examples/s]
Map: 100%|██████████| 97/97 [00:00<00:00, 150.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 331.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.5748376250267029, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.249, 'eval_steps_per_second': 11.249, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 26/39 [00:47<00:19,  1.51s/it]

{'eval_loss': 0.45665469765663147, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.244, 'eval_steps_per_second': 11.244, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 39/39 [01:12<00:00,  1.53s/it]

{'eval_loss': 0.43542128801345825, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.653, 'eval_steps_per_second': 11.653, 'epoch': 3.0}


100%|██████████| 39/39 [01:13<00:00,  1.89s/it]


{'train_runtime': 73.7489, 'train_samples_per_second': 3.946, 'train_steps_per_second': 0.529, 'train_loss': 0.49044139568622297, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.55it/s]
100%|██████████| 1/1 [00:00<00:00, 165.14it/s]
100%|██████████| 1/1 [00:00<00:00, 82.87it/s]
Casting the dataset: 100%|██████████| 97/97 [00:00<00:00, 47099.73 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 97/97 [00:00<00:00, 153.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.5748376250267029, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.073, 'eval_steps_per_second': 12.073, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 26/39 [00:47<00:20,  1.54s/it]

{'eval_loss': 0.45665469765663147, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.213, 'eval_steps_per_second': 12.213, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 39/39 [01:12<00:00,  1.50s/it]

{'eval_loss': 0.43542128801345825, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0747, 'eval_samples_per_second': 13.38, 'eval_steps_per_second': 13.38, 'epoch': 3.0}


100%|██████████| 39/39 [01:13<00:00,  1.88s/it]


{'train_runtime': 73.3088, 'train_samples_per_second': 3.97, 'train_steps_per_second': 0.532, 'train_loss': 0.49044139568622297, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.21it/s]
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
100%|██████████| 1/1 [00:00<00:00, 165.05it/s]
Casting the dataset: 100%|██████████| 97/97 [00:00<00:00, 47434.71 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 97/97 [00:00<00:00, 151.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 315.24 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5748376250267029, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0792, 'eval_samples_per_second': 12.627, 'eval_steps_per_second': 12.627, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 26/39 [00:48<00:19,  1.53s/it]

{'eval_loss': 0.45665469765663147, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0862, 'eval_samples_per_second': 11.606, 'eval_steps_per_second': 11.606, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 39/39 [01:12<00:00,  1.52s/it]

{'eval_loss': 0.43542128801345825, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.221, 'eval_steps_per_second': 12.221, 'epoch': 3.0}


100%|██████████| 39/39 [01:13<00:00,  1.89s/it]


{'train_runtime': 73.6807, 'train_samples_per_second': 3.949, 'train_steps_per_second': 0.529, 'train_loss': 0.49044139568622297, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 163.99it/s]
100%|██████████| 1/1 [00:00<00:00, 141.32it/s]
Casting the dataset: 100%|██████████| 97/97 [00:00<00:00, 32330.54 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 97/97 [00:00<00:00, 151.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 485.85 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5748376250267029, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.361, 'eval_steps_per_second': 12.361, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 26/39 [00:47<00:19,  1.52s/it]

{'eval_loss': 0.45665469765663147, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.769, 'eval_steps_per_second': 11.769, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 39/39 [01:11<00:00,  1.51s/it]

{'eval_loss': 0.43542128801345825, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8888888888888888, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.138, 'eval_steps_per_second': 12.138, 'epoch': 3.0}


100%|██████████| 39/39 [01:13<00:00,  1.88s/it]


{'train_runtime': 73.2382, 'train_samples_per_second': 3.973, 'train_steps_per_second': 0.533, 'train_loss': 0.49044139568622297, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 965.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.273090362548828, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.2222222222222222, 'eval_span_f1': 0.0, 'eval_runtime': 0.0885, 'eval_samples_per_second': 11.302, 'eval_steps_per_second': 11.302, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:02<00:01,  1.50s/it]

{'eval_loss': 2.0506982803344727, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6111111111111112, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.226, 'eval_steps_per_second': 12.226, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:04<00:00,  1.75s/it]

{'eval_loss': 1.9494611024856567, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7063492063492064, 'eval_span_f1': 0.0, 'eval_runtime': 0.0955, 'eval_samples_per_second': 10.47, 'eval_steps_per_second': 10.47, 'epoch': 3.0}


100%|██████████| 3/3 [00:06<00:00,  2.06s/it]


{'train_runtime': 6.1714, 'train_samples_per_second': 0.486, 'train_steps_per_second': 0.486, 'train_loss': 2.0825417836507163, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 76.18it/s]
100%|██████████| 1/1 [00:00<00:00, 82.39it/s]
100%|██████████| 1/1 [00:00<00:00, 165.00it/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 960.89 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 958.70 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be ab

{'eval_loss': 2.1261708736419678, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.373015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.506, 'eval_steps_per_second': 11.506, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.64s/it]

{'eval_loss': 1.812153935432434, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6984126984126984, 'eval_span_f1': 0.0, 'eval_runtime': 0.0899, 'eval_samples_per_second': 11.123, 'eval_steps_per_second': 11.123, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  1.96s/it]

{'eval_loss': 1.660003900527954, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7777777777777778, 'eval_span_f1': 0.0, 'eval_runtime': 0.0877, 'eval_samples_per_second': 11.397, 'eval_steps_per_second': 11.397, 'epoch': 3.0}


100%|██████████| 3/3 [00:06<00:00,  2.26s/it]


{'train_runtime': 6.7785, 'train_samples_per_second': 0.443, 'train_steps_per_second': 0.443, 'train_loss': 1.9410676956176758, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 90.36it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 82.63it/s]
100%|██████████| 1/1 [00:00<00:00, 164.83it/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should p

{'eval_loss': 2.1261708736419678, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.373015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0801, 'eval_samples_per_second': 12.483, 'eval_steps_per_second': 12.483, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:02<00:01,  1.53s/it]

{'eval_loss': 1.812153935432434, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6984126984126984, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.38, 'eval_steps_per_second': 12.38, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  1.80s/it]

{'eval_loss': 1.660003900527954, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7777777777777778, 'eval_span_f1': 0.0, 'eval_runtime': 0.091, 'eval_samples_per_second': 10.985, 'eval_steps_per_second': 10.985, 'epoch': 3.0}


100%|██████████| 3/3 [00:06<00:00,  2.15s/it]


{'train_runtime': 6.4443, 'train_samples_per_second': 0.466, 'train_steps_per_second': 0.466, 'train_loss': 1.9410676956176758, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.59it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.12it/s]
100%|██████████| 1/1 [00:00<00:00, 164.85it/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.1261708736419678, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.373015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.077, 'eval_steps_per_second': 12.077, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.61s/it]

{'eval_loss': 1.812153935432434, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6984126984126984, 'eval_span_f1': 0.0, 'eval_runtime': 0.0894, 'eval_samples_per_second': 11.184, 'eval_steps_per_second': 11.184, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  1.87s/it]

{'eval_loss': 1.660003900527954, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7777777777777778, 'eval_span_f1': 0.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.519, 'eval_steps_per_second': 11.519, 'epoch': 3.0}


100%|██████████| 3/3 [00:06<00:00,  2.16s/it]


{'train_runtime': 6.4852, 'train_samples_per_second': 0.463, 'train_steps_per_second': 0.463, 'train_loss': 1.9410676956176758, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.75it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
100%|██████████| 1/1 [00:00<00:00, 145.43it/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 962.66 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 950.87 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.1261708736419678, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.373015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.917, 'eval_steps_per_second': 11.917, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.60s/it]

{'eval_loss': 1.812153935432434, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6984126984126984, 'eval_span_f1': 0.0, 'eval_runtime': 0.0949, 'eval_samples_per_second': 10.536, 'eval_steps_per_second': 10.536, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  1.86s/it]

{'eval_loss': 1.660003900527954, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7777777777777778, 'eval_span_f1': 0.0, 'eval_runtime': 0.0919, 'eval_samples_per_second': 10.879, 'eval_steps_per_second': 10.879, 'epoch': 3.0}


100%|██████████| 3/3 [00:06<00:00,  2.18s/it]


{'train_runtime': 6.5385, 'train_samples_per_second': 0.459, 'train_steps_per_second': 0.459, 'train_loss': 1.9410676956176758, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
Casting the dataset: 100%|██████████| 2/2 [00:00<00:00, 2002.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 2/2 [00:00<00:00, 333.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.11 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 1.9612274169921875, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.5873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0803, 'eval_samples_per_second': 12.449, 'eval_steps_per_second': 12.449, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.70s/it]

{'eval_loss': 1.5917222499847412, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8253968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0979, 'eval_samples_per_second': 10.216, 'eval_steps_per_second': 10.216, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  1.99s/it]

{'eval_loss': 1.4246917963027954, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0999, 'eval_samples_per_second': 10.011, 'eval_steps_per_second': 10.011, 'epoch': 3.0}


100%|██████████| 3/3 [00:06<00:00,  2.31s/it]


{'train_runtime': 6.9266, 'train_samples_per_second': 0.866, 'train_steps_per_second': 0.433, 'train_loss': 1.82470703125, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 76.34it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 163.92it/s]
100%|██████████| 1/1 [00:00<00:00, 164.97it/s]
Casting the dataset: 100%|██████████| 2/2 [00:00<00:00, 971.13 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 2/2 [00:00<00:00, 396.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initializ

{'eval_loss': 1.9612274169921875, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.5873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.374, 'eval_steps_per_second': 12.374, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.80s/it]

{'eval_loss': 1.5917222499847412, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8253968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0835, 'eval_samples_per_second': 11.974, 'eval_steps_per_second': 11.974, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  2.02s/it]

{'eval_loss': 1.4246917963027954, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.079, 'eval_steps_per_second': 12.079, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.38s/it]


{'train_runtime': 7.1299, 'train_samples_per_second': 0.842, 'train_steps_per_second': 0.421, 'train_loss': 1.82470703125, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.76it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
100%|██████████| 1/1 [00:00<00:00, 164.73it/s]
Casting the dataset: 100%|██████████| 2/2 [00:00<00:00, 999.48 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 2/2 [00:00<00:00, 400.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.55 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initiali

{'eval_loss': 1.9612274169921875, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.5873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.844, 'eval_steps_per_second': 12.844, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.72s/it]

{'eval_loss': 1.5917222499847412, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8253968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.909, 'eval_steps_per_second': 11.909, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  1.97s/it]

{'eval_loss': 1.4246917963027954, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0919, 'eval_samples_per_second': 10.884, 'eval_steps_per_second': 10.884, 'epoch': 3.0}


100%|██████████| 3/3 [00:06<00:00,  2.33s/it]


{'train_runtime': 6.9776, 'train_samples_per_second': 0.86, 'train_steps_per_second': 0.43, 'train_loss': 1.82470703125, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.26it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 2/2 [00:00<00:00, 2000.14 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 2/2 [00:00<00:00, 396.72 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 222.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initiali

{'eval_loss': 1.9612274169921875, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.5873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.531, 'eval_steps_per_second': 12.531, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.72s/it]

{'eval_loss': 1.5917222499847412, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8253968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.065, 'eval_steps_per_second': 12.065, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  1.98s/it]

{'eval_loss': 1.4246917963027954, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.081, 'eval_samples_per_second': 12.351, 'eval_steps_per_second': 12.351, 'epoch': 3.0}


100%|██████████| 3/3 [00:06<00:00,  2.32s/it]


{'train_runtime': 6.9597, 'train_samples_per_second': 0.862, 'train_steps_per_second': 0.431, 'train_loss': 1.82470703125, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.94it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.61it/s]
100%|██████████| 1/1 [00:00<00:00, 164.44it/s]
Casting the dataset: 100%|██████████| 2/2 [00:00<00:00, 999.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 962.66 examples/s]
Map: 100%|██████████| 2/2 [00:00<00:00, 396.64 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.84 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initializ

{'eval_loss': 1.9612274169921875, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.5873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.007, 'eval_steps_per_second': 13.007, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.75s/it]

{'eval_loss': 1.5917222499847412, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8253968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0919, 'eval_samples_per_second': 10.877, 'eval_steps_per_second': 10.877, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  2.01s/it]

{'eval_loss': 1.4246917963027954, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0965, 'eval_samples_per_second': 10.358, 'eval_steps_per_second': 10.358, 'epoch': 3.0}


100%|██████████| 3/3 [00:06<00:00,  2.31s/it]


{'train_runtime': 6.941, 'train_samples_per_second': 0.864, 'train_steps_per_second': 0.432, 'train_loss': 1.82470703125, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.60it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.29it/s]
100%|██████████| 1/1 [00:00<00:00, 165.10it/s]
Casting the dataset: 100%|██████████| 3/3 [00:00<00:00, 1500.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 488.45 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 3/3 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.16 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initiali

{'eval_loss': 2.189781427383423, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.2777777777777778, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.29, 'eval_steps_per_second': 12.29, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.94s/it]

{'eval_loss': 1.9864227771759033, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6031746031746031, 'eval_span_f1': 0.0, 'eval_runtime': 0.0961, 'eval_samples_per_second': 10.409, 'eval_steps_per_second': 10.409, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.18s/it]

{'eval_loss': 1.860284447669983, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0884, 'eval_samples_per_second': 11.309, 'eval_steps_per_second': 11.309, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.51s/it]


{'train_runtime': 7.5273, 'train_samples_per_second': 1.196, 'train_steps_per_second': 0.399, 'train_loss': 2.0788278579711914, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 90.39it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 76.25it/s]
100%|██████████| 1/1 [00:00<00:00, 164.86it/s]
Casting the dataset: 100%|██████████| 3/3 [00:00<00:00, 1499.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 958.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 3/3 [00:00<00:00, 500.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.53 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should 

{'eval_loss': 2.189781427383423, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.2777777777777778, 'eval_span_f1': 0.0, 'eval_runtime': 0.1152, 'eval_samples_per_second': 8.681, 'eval_steps_per_second': 8.681, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.90s/it]

{'eval_loss': 1.9864227771759033, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6031746031746031, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.931, 'eval_steps_per_second': 11.931, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.15s/it]

{'eval_loss': 1.860284447669983, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.375, 'eval_steps_per_second': 11.375, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.48s/it]


{'train_runtime': 7.4446, 'train_samples_per_second': 1.209, 'train_steps_per_second': 0.403, 'train_loss': 2.0788278579711914, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.80it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.18it/s]
100%|██████████| 1/1 [00:00<00:00, 165.80it/s]
Casting the dataset: 100%|██████████| 3/3 [00:00<00:00, 2868.23 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 944.66 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 3/3 [00:00<00:00, 594.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shou

{'eval_loss': 2.189781427383423, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.2777777777777778, 'eval_span_f1': 0.0, 'eval_runtime': 0.1173, 'eval_samples_per_second': 8.528, 'eval_steps_per_second': 8.528, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.93s/it]

{'eval_loss': 1.9864227771759033, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6031746031746031, 'eval_span_f1': 0.0, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.631, 'eval_steps_per_second': 11.631, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.17s/it]

{'eval_loss': 1.860284447669983, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.532, 'eval_steps_per_second': 12.532, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.49s/it]


{'train_runtime': 7.4656, 'train_samples_per_second': 1.206, 'train_steps_per_second': 0.402, 'train_loss': 2.0788278579711914, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 150.32it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.46it/s]
100%|██████████| 1/1 [00:00<00:00, 181.74it/s]
Casting the dataset: 100%|██████████| 3/3 [00:00<00:00, 1467.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 946.15 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 3/3 [00:00<00:00, 496.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.189781427383423, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.2777777777777778, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.017, 'eval_steps_per_second': 13.017, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.91s/it]

{'eval_loss': 1.9864227771759033, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6031746031746031, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.791, 'eval_steps_per_second': 11.791, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.18s/it]

{'eval_loss': 1.860284447669983, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0927, 'eval_samples_per_second': 10.784, 'eval_steps_per_second': 10.784, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.51s/it]


{'train_runtime': 7.5233, 'train_samples_per_second': 1.196, 'train_steps_per_second': 0.399, 'train_loss': 2.0788278579711914, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.73it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
100%|██████████| 1/1 [00:00<00:00, 141.57it/s]
Casting the dataset: 100%|██████████| 3/3 [00:00<00:00, 3000.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.51 examples/s]
Map: 100%|██████████| 3/3 [00:00<00:00, 496.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.189781427383423, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.2777777777777778, 'eval_span_f1': 0.0, 'eval_runtime': 0.1236, 'eval_samples_per_second': 8.09, 'eval_steps_per_second': 8.09, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.90s/it]

{'eval_loss': 1.9864227771759033, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6031746031746031, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.071, 'eval_steps_per_second': 12.071, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.16s/it]

{'eval_loss': 1.860284447669983, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0959, 'eval_samples_per_second': 10.429, 'eval_steps_per_second': 10.429, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.47s/it]


{'train_runtime': 7.4236, 'train_samples_per_second': 1.212, 'train_steps_per_second': 0.404, 'train_loss': 2.0788278579711914, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.72it/s]
100%|██████████| 1/1 [00:00<00:00, 164.80it/s]
Casting the dataset: 100%|██████████| 4/4 [00:00<00:00, 4000.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 964.21 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 568.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shou

{'eval_loss': 2.1729230880737305, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3253968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0909, 'eval_samples_per_second': 10.998, 'eval_steps_per_second': 10.998, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.09s/it]

{'eval_loss': 1.9617087841033936, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.626984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0899, 'eval_samples_per_second': 11.127, 'eval_steps_per_second': 11.127, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.31s/it]

{'eval_loss': 1.7861324548721313, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8095238095238095, 'eval_span_f1': 0.0, 'eval_runtime': 0.0899, 'eval_samples_per_second': 11.129, 'eval_steps_per_second': 11.129, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.65s/it]


{'train_runtime': 7.9482, 'train_samples_per_second': 1.51, 'train_steps_per_second': 0.377, 'train_loss': 2.0650046666463218, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 90.51it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 82.89it/s]
100%|██████████| 1/1 [00:00<00:00, 164.76it/s]
Casting the dataset: 100%|██████████| 4/4 [00:00<00:00, 4001.24 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 571.53 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should 

{'eval_loss': 2.1729230880737305, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3253968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0803, 'eval_samples_per_second': 12.447, 'eval_steps_per_second': 12.447, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.12s/it]

{'eval_loss': 1.9617087841033936, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.626984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.1272, 'eval_samples_per_second': 7.862, 'eval_steps_per_second': 7.862, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.34s/it]

{'eval_loss': 1.7861324548721313, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8095238095238095, 'eval_span_f1': 0.0, 'eval_runtime': 0.093, 'eval_samples_per_second': 10.751, 'eval_steps_per_second': 10.751, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.66s/it]


{'train_runtime': 7.9676, 'train_samples_per_second': 1.506, 'train_steps_per_second': 0.377, 'train_loss': 2.0650046666463218, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.26it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.72it/s]
100%|██████████| 1/1 [00:00<00:00, 141.17it/s]
Casting the dataset: 100%|██████████| 4/4 [00:00<00:00, 4005.06 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 965.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 662.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.1729230880737305, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3253968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.067, 'eval_steps_per_second': 12.067, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.08s/it]

{'eval_loss': 1.9617087841033936, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.626984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.515, 'eval_steps_per_second': 11.515, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.34s/it]

{'eval_loss': 1.7861324548721313, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8095238095238095, 'eval_span_f1': 0.0, 'eval_runtime': 0.0945, 'eval_samples_per_second': 10.582, 'eval_steps_per_second': 10.582, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.65s/it]


{'train_runtime': 7.9365, 'train_samples_per_second': 1.512, 'train_steps_per_second': 0.378, 'train_loss': 2.0650046666463218, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.63it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.05it/s]
100%|██████████| 1/1 [00:00<00:00, 141.76it/s]
Casting the dataset: 100%|██████████| 4/4 [00:00<00:00, 3999.34 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.05 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 567.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.1729230880737305, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3253968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.189, 'eval_steps_per_second': 13.189, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:02,  2.06s/it]

{'eval_loss': 1.9617087841033936, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.626984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0864, 'eval_samples_per_second': 11.569, 'eval_steps_per_second': 11.569, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.30s/it]

{'eval_loss': 1.7861324548721313, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8095238095238095, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.64, 'eval_steps_per_second': 11.64, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.61s/it]


{'train_runtime': 7.8223, 'train_samples_per_second': 1.534, 'train_steps_per_second': 0.384, 'train_loss': 2.0650046666463218, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.64it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.61it/s]
100%|██████████| 1/1 [00:00<00:00, 141.72it/s]
Casting the dataset: 100%|██████████| 4/4 [00:00<00:00, 4000.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 957.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 568.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.1729230880737305, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3253968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.692, 'eval_steps_per_second': 12.692, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.12s/it]

{'eval_loss': 1.9617087841033936, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.626984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0894, 'eval_samples_per_second': 11.185, 'eval_steps_per_second': 11.185, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.36s/it]

{'eval_loss': 1.7861324548721313, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8095238095238095, 'eval_span_f1': 0.0, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.652, 'eval_steps_per_second': 11.652, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.68s/it]


{'train_runtime': 8.0488, 'train_samples_per_second': 1.491, 'train_steps_per_second': 0.373, 'train_loss': 2.0650046666463218, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.67it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 109.70it/s]
100%|██████████| 1/1 [00:00<00:00, 166.60it/s]
Casting the dataset: 100%|██████████| 5/5 [00:00<00:00, 4807.78 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 5/5 [00:00<00:00, 552.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.1732277870178223, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.2777777777777778, 'eval_span_f1': 0.0, 'eval_runtime': 0.0897, 'eval_samples_per_second': 11.153, 'eval_steps_per_second': 11.153, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.22s/it]

{'eval_loss': 1.8304892778396606, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7619047619047619, 'eval_span_f1': 0.0, 'eval_runtime': 0.093, 'eval_samples_per_second': 10.756, 'eval_steps_per_second': 10.756, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.50s/it]

{'eval_loss': 1.722342610359192, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8253968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.1081, 'eval_samples_per_second': 9.254, 'eval_steps_per_second': 9.254, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.82s/it]


{'train_runtime': 8.4441, 'train_samples_per_second': 1.776, 'train_steps_per_second': 0.355, 'train_loss': 2.037052949269613, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 88.98it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 89.53it/s]
100%|██████████| 1/1 [00:00<00:00, 141.42it/s]
Casting the dataset: 100%|██████████| 5/5 [00:00<00:00, 5003.94 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 965.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 5/5 [00:00<00:00, 613.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 323.24 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should p

{'eval_loss': 2.1732277870178223, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.2777777777777778, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.904, 'eval_steps_per_second': 11.904, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.23s/it]

{'eval_loss': 1.8304892778396606, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7619047619047619, 'eval_span_f1': 0.0, 'eval_runtime': 0.0913, 'eval_samples_per_second': 10.958, 'eval_steps_per_second': 10.958, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.48s/it]

{'eval_loss': 1.722342610359192, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8253968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0939, 'eval_samples_per_second': 10.646, 'eval_steps_per_second': 10.646, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.80s/it]


{'train_runtime': 8.391, 'train_samples_per_second': 1.788, 'train_steps_per_second': 0.358, 'train_loss': 2.037052949269613, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.86it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.74it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 5/5 [00:00<00:00, 4995.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 957.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 5/5 [00:00<00:00, 710.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 489.53 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.63 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shou

{'eval_loss': 2.1732277870178223, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.2777777777777778, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.198, 'eval_steps_per_second': 13.198, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.23s/it]

{'eval_loss': 1.8304892778396606, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7619047619047619, 'eval_span_f1': 0.0, 'eval_runtime': 0.0804, 'eval_samples_per_second': 12.443, 'eval_steps_per_second': 12.443, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.50s/it]

{'eval_loss': 1.722342610359192, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8253968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.782, 'eval_steps_per_second': 11.782, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.82s/it]


{'train_runtime': 8.4519, 'train_samples_per_second': 1.775, 'train_steps_per_second': 0.355, 'train_loss': 2.037052949269613, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.97it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.79it/s]
100%|██████████| 1/1 [00:00<00:00, 164.84it/s]
Casting the dataset: 100%|██████████| 5/5 [00:00<00:00, 5000.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 491.54 examples/s]
Map: 100%|██████████| 5/5 [00:00<00:00, 553.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.1732277870178223, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.2777777777777778, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.009, 'eval_steps_per_second': 13.009, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.26s/it]

{'eval_loss': 1.8304892778396606, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7619047619047619, 'eval_span_f1': 0.0, 'eval_runtime': 0.0956, 'eval_samples_per_second': 10.461, 'eval_steps_per_second': 10.461, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.50s/it]

{'eval_loss': 1.722342610359192, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8253968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.707, 'eval_steps_per_second': 11.707, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.84s/it]


{'train_runtime': 8.5213, 'train_samples_per_second': 1.76, 'train_steps_per_second': 0.352, 'train_loss': 2.037052949269613, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.34it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
100%|██████████| 1/1 [00:00<00:00, 165.04it/s]
Casting the dataset: 100%|██████████| 5/5 [00:00<00:00, 2500.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 5/5 [00:00<00:00, 622.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.14 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sho

{'eval_loss': 2.1732277870178223, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.2777777777777778, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.85, 'eval_steps_per_second': 12.85, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.20s/it]

{'eval_loss': 1.8304892778396606, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7619047619047619, 'eval_span_f1': 0.0, 'eval_runtime': 0.0979, 'eval_samples_per_second': 10.211, 'eval_steps_per_second': 10.211, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.49s/it]

{'eval_loss': 1.722342610359192, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8253968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.184, 'eval_steps_per_second': 12.184, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.79s/it]


{'train_runtime': 8.3696, 'train_samples_per_second': 1.792, 'train_steps_per_second': 0.358, 'train_loss': 2.037052949269613, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.27it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.16it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
Casting the dataset: 100%|██████████| 6/6 [00:00<00:00, 2939.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 662.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 244.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 235.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.143909454345703, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3968253968253968, 'eval_span_f1': 0.0, 'eval_runtime': 0.0878, 'eval_samples_per_second': 11.389, 'eval_steps_per_second': 11.389, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.41s/it]

{'eval_loss': 1.8184616565704346, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.1045, 'eval_samples_per_second': 9.566, 'eval_steps_per_second': 9.566, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.67s/it]

{'eval_loss': 1.712701439857483, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8253968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0971, 'eval_samples_per_second': 10.3, 'eval_steps_per_second': 10.3, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  3.00s/it]


{'train_runtime': 8.9841, 'train_samples_per_second': 2.004, 'train_steps_per_second': 0.334, 'train_loss': 2.025106906890869, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.74it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 82.98it/s]
100%|██████████| 1/1 [00:00<00:00, 124.08it/s]
Casting the dataset: 100%|██████████| 6/6 [00:00<00:00, 6006.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.28 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 663.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should 

{'eval_loss': 2.143909454345703, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3968253968253968, 'eval_span_f1': 0.0, 'eval_runtime': 0.0757, 'eval_samples_per_second': 13.205, 'eval_steps_per_second': 13.205, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.39s/it]

{'eval_loss': 1.8184616565704346, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.372, 'eval_steps_per_second': 12.372, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.64s/it]

{'eval_loss': 1.712701439857483, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8253968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0878, 'eval_samples_per_second': 11.388, 'eval_steps_per_second': 11.388, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.97s/it]


{'train_runtime': 8.9109, 'train_samples_per_second': 2.02, 'train_steps_per_second': 0.337, 'train_loss': 2.025106906890869, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.68it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.77it/s]
100%|██████████| 1/1 [00:00<00:00, 200.06it/s]
Casting the dataset: 100%|██████████| 6/6 [00:00<00:00, 5999.00 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 746.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 330.16 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.143909454345703, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3968253968253968, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.697, 'eval_steps_per_second': 12.697, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.41s/it]

{'eval_loss': 1.8184616565704346, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.527, 'eval_steps_per_second': 12.527, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.62s/it]

{'eval_loss': 1.712701439857483, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8253968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.502, 'eval_steps_per_second': 11.502, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.95s/it]


{'train_runtime': 8.8402, 'train_samples_per_second': 2.036, 'train_steps_per_second': 0.339, 'train_loss': 2.025106906890869, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.35it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.80it/s]
100%|██████████| 1/1 [00:00<00:00, 141.67it/s]
Casting the dataset: 100%|██████████| 6/6 [00:00<00:00, 6000.43 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 594.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.70 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.143909454345703, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3968253968253968, 'eval_span_f1': 0.0, 'eval_runtime': 0.0773, 'eval_samples_per_second': 12.93, 'eval_steps_per_second': 12.93, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.38s/it]

{'eval_loss': 1.8184616565704346, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.617, 'eval_steps_per_second': 11.617, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.64s/it]

{'eval_loss': 1.712701439857483, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8253968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.535, 'eval_steps_per_second': 12.535, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.96s/it]


{'train_runtime': 8.8724, 'train_samples_per_second': 2.029, 'train_steps_per_second': 0.338, 'train_loss': 2.025106906890869, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.81it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 165.01it/s]
Casting the dataset: 100%|██████████| 6/6 [00:00<00:00, 5997.57 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 956.73 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 595.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.143909454345703, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.3968253968253968, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.777, 'eval_steps_per_second': 11.777, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.39s/it]

{'eval_loss': 1.8184616565704346, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.379, 'eval_steps_per_second': 11.379, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.66s/it]

{'eval_loss': 1.712701439857483, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8253968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.63, 'eval_steps_per_second': 11.63, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.97s/it]


{'train_runtime': 8.8943, 'train_samples_per_second': 2.024, 'train_steps_per_second': 0.337, 'train_loss': 2.025106906890869, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
100%|██████████| 1/1 [00:00<00:00, 164.77it/s]
Casting the dataset: 100%|██████████| 7/7 [00:00<00:00, 3497.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 965.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 7/7 [00:00<00:00, 696.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.136021614074707, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.4126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0976, 'eval_samples_per_second': 10.244, 'eval_steps_per_second': 10.244, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.53s/it]

{'eval_loss': 1.8022042512893677, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.1031, 'eval_samples_per_second': 9.7, 'eval_steps_per_second': 9.7, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  2.82s/it]

{'eval_loss': 1.6728492975234985, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8333333333333334, 'eval_span_f1': 0.0, 'eval_runtime': 0.104, 'eval_samples_per_second': 9.615, 'eval_steps_per_second': 9.615, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.13s/it]


{'train_runtime': 9.3887, 'train_samples_per_second': 2.237, 'train_steps_per_second': 0.32, 'train_loss': 2.0211691856384277, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 83.08it/s]
100%|██████████| 1/1 [00:00<00:00, 90.44it/s]
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
Casting the dataset: 100%|██████████| 7/7 [00:00<00:00, 3500.25 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 7/7 [00:00<00:00, 633.92 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.136021614074707, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.4126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.224, 'eval_steps_per_second': 12.224, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.57s/it]

{'eval_loss': 1.8022042512893677, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.649, 'eval_steps_per_second': 11.649, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  2.84s/it]

{'eval_loss': 1.6728492975234985, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8333333333333334, 'eval_span_f1': 0.0, 'eval_runtime': 0.0909, 'eval_samples_per_second': 10.996, 'eval_steps_per_second': 10.996, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.15s/it]


{'train_runtime': 9.4615, 'train_samples_per_second': 2.22, 'train_steps_per_second': 0.317, 'train_loss': 2.0211691856384277, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 141.48it/s]
100%|██████████| 1/1 [00:00<00:00, 141.42it/s]
Casting the dataset: 100%|██████████| 7/7 [00:00<00:00, 7000.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 7/7 [00:00<00:00, 634.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.24 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sho

{'eval_loss': 2.136021614074707, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.4126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.855, 'eval_steps_per_second': 12.855, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.53s/it]

{'eval_loss': 1.8022042512893677, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.632, 'eval_steps_per_second': 11.632, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  2.80s/it]

{'eval_loss': 1.6728492975234985, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8333333333333334, 'eval_span_f1': 0.0, 'eval_runtime': 0.0909, 'eval_samples_per_second': 10.998, 'eval_steps_per_second': 10.998, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.12s/it]


{'train_runtime': 9.3646, 'train_samples_per_second': 2.242, 'train_steps_per_second': 0.32, 'train_loss': 2.0211691856384277, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.12it/s]
100%|██████████| 1/1 [00:00<00:00, 141.39it/s]
100%|██████████| 1/1 [00:00<00:00, 141.70it/s]
Casting the dataset: 100%|██████████| 7/7 [00:00<00:00, 6993.84 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1987.82 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 7/7 [00:00<00:00, 634.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.53 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shou

{'eval_loss': 2.136021614074707, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.4126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.917, 'eval_steps_per_second': 11.917, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.56s/it]

{'eval_loss': 1.8022042512893677, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0899, 'eval_samples_per_second': 11.117, 'eval_steps_per_second': 11.117, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  2.84s/it]

{'eval_loss': 1.6728492975234985, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8333333333333334, 'eval_span_f1': 0.0, 'eval_runtime': 0.0935, 'eval_samples_per_second': 10.699, 'eval_steps_per_second': 10.699, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.18s/it]


{'train_runtime': 9.5335, 'train_samples_per_second': 2.203, 'train_steps_per_second': 0.315, 'train_loss': 2.0211691856384277, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.53it/s]
100%|██████████| 1/1 [00:00<00:00, 165.11it/s]
100%|██████████| 1/1 [00:00<00:00, 164.57it/s]
Casting the dataset: 100%|██████████| 7/7 [00:00<00:00, 3500.67 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Map: 100%|██████████| 7/7 [00:00<00:00, 533.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.136021614074707, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.4126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.22, 'eval_steps_per_second': 12.22, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.60s/it]

{'eval_loss': 1.8022042512893677, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.916, 'eval_steps_per_second': 11.916, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  2.84s/it]

{'eval_loss': 1.6728492975234985, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8333333333333334, 'eval_span_f1': 0.0, 'eval_runtime': 0.0908, 'eval_samples_per_second': 11.019, 'eval_steps_per_second': 11.019, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.16s/it]


{'train_runtime': 9.4804, 'train_samples_per_second': 2.215, 'train_steps_per_second': 0.316, 'train_loss': 2.0211691856384277, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.52it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 164.56it/s]
Casting the dataset: 100%|██████████| 8/8 [00:00<00:00, 7979.65 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 530.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.1287689208984375, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.42063492063492064, 'eval_span_f1': 0.0, 'eval_runtime': 0.0898, 'eval_samples_per_second': 11.133, 'eval_steps_per_second': 11.133, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.75s/it]

{'eval_loss': 1.7948106527328491, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.63, 'eval_steps_per_second': 11.63, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  3.00s/it]

{'eval_loss': 1.6653401851654053, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8333333333333334, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.795, 'eval_steps_per_second': 11.795, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.33s/it]


{'train_runtime': 9.9748, 'train_samples_per_second': 2.406, 'train_steps_per_second': 0.301, 'train_loss': 2.019052823384603, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 90.08it/s]
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
100%|██████████| 1/1 [00:00<00:00, 141.66it/s]
Casting the dataset: 100%|██████████| 8/8 [00:00<00:00, 8008.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 568.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.1287689208984375, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.42063492063492064, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.364, 'eval_steps_per_second': 12.364, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.75s/it]

{'eval_loss': 1.7948106527328491, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.78, 'eval_steps_per_second': 11.78, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  3.02s/it]

{'eval_loss': 1.6653401851654053, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8333333333333334, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.64, 'eval_steps_per_second': 11.64, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.33s/it]


{'train_runtime': 9.9832, 'train_samples_per_second': 2.404, 'train_steps_per_second': 0.301, 'train_loss': 2.019052823384603, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.66it/s]
100%|██████████| 1/1 [00:00<00:00, 164.92it/s]
100%|██████████| 1/1 [00:00<00:00, 141.52it/s]
Casting the dataset: 100%|██████████| 8/8 [00:00<00:00, 4000.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 530.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.1287689208984375, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.42063492063492064, 'eval_span_f1': 0.0, 'eval_runtime': 0.0804, 'eval_samples_per_second': 12.441, 'eval_steps_per_second': 12.441, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.72s/it]

{'eval_loss': 1.7948106527328491, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0929, 'eval_samples_per_second': 10.77, 'eval_steps_per_second': 10.77, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  2.99s/it]

{'eval_loss': 1.6653401851654053, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8333333333333334, 'eval_span_f1': 0.0, 'eval_runtime': 0.0911, 'eval_samples_per_second': 10.982, 'eval_steps_per_second': 10.982, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.30s/it]


{'train_runtime': 9.8991, 'train_samples_per_second': 2.424, 'train_steps_per_second': 0.303, 'train_loss': 2.019052823384603, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 124.08it/s]
100%|██████████| 1/1 [00:00<00:00, 141.59it/s]
100%|██████████| 1/1 [00:00<00:00, 164.96it/s]
Casting the dataset: 100%|██████████| 8/8 [00:00<00:00, 8002.49 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 488.85 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 724.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shou

{'eval_loss': 2.1287689208984375, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.42063492063492064, 'eval_span_f1': 0.0, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.762, 'eval_steps_per_second': 11.762, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.75s/it]

{'eval_loss': 1.7948106527328491, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0965, 'eval_samples_per_second': 10.361, 'eval_steps_per_second': 10.361, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  3.01s/it]

{'eval_loss': 1.6653401851654053, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8333333333333334, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.912, 'eval_steps_per_second': 11.912, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.32s/it]


{'train_runtime': 9.9641, 'train_samples_per_second': 2.409, 'train_steps_per_second': 0.301, 'train_loss': 2.019052823384603, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.76it/s]
100%|██████████| 1/1 [00:00<00:00, 141.41it/s]
100%|██████████| 1/1 [00:00<00:00, 141.65it/s]
Casting the dataset: 100%|██████████| 8/8 [00:00<00:00, 7998.67 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 611.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.10 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shou

{'eval_loss': 2.1287689208984375, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.42063492063492064, 'eval_span_f1': 0.0, 'eval_runtime': 0.0757, 'eval_samples_per_second': 13.209, 'eval_steps_per_second': 13.209, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.73s/it]

{'eval_loss': 1.7948106527328491, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7698412698412699, 'eval_span_f1': 0.0, 'eval_runtime': 0.0878, 'eval_samples_per_second': 11.387, 'eval_steps_per_second': 11.387, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  3.00s/it]

{'eval_loss': 1.6653401851654053, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8333333333333334, 'eval_span_f1': 0.0, 'eval_runtime': 0.0845, 'eval_samples_per_second': 11.83, 'eval_steps_per_second': 11.83, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.32s/it]


{'train_runtime': 9.961, 'train_samples_per_second': 2.409, 'train_steps_per_second': 0.301, 'train_loss': 2.019052823384603, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.63it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 141.84it/s]
Casting the dataset: 100%|██████████| 9/9 [00:00<00:00, 9002.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 9/9 [00:00<00:00, 526.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shou

{'eval_loss': 1.843542218208313, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7619047619047619, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.373, 'eval_steps_per_second': 11.373, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:06<00:03,  1.64s/it]

{'eval_loss': 1.3084814548492432, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0875, 'eval_samples_per_second': 11.43, 'eval_steps_per_second': 11.43, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:10<00:00,  1.71s/it]

{'eval_loss': 1.1274728775024414, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0864, 'eval_samples_per_second': 11.57, 'eval_steps_per_second': 11.57, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.02s/it]


{'train_runtime': 12.0953, 'train_samples_per_second': 2.232, 'train_steps_per_second': 0.496, 'train_loss': 1.5845478375752766, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 151.85it/s]
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
100%|██████████| 1/1 [00:00<00:00, 141.48it/s]
Casting the dataset: 100%|██████████| 9/9 [00:00<00:00, 4499.25 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 9/9 [00:00<00:00, 497.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 1.6314164400100708, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8412698412698413, 'eval_span_f1': 0.0, 'eval_runtime': 0.0878, 'eval_samples_per_second': 11.387, 'eval_steps_per_second': 11.387, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:06<00:03,  1.63s/it]

{'eval_loss': 0.9276310205459595, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0884, 'eval_samples_per_second': 11.308, 'eval_steps_per_second': 11.308, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:10<00:00,  1.73s/it]

{'eval_loss': 0.8563469648361206, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.91, 'eval_steps_per_second': 11.91, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.05s/it]


{'train_runtime': 12.2928, 'train_samples_per_second': 2.196, 'train_steps_per_second': 0.488, 'train_loss': 1.3205515543619792, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.00it/s]
100%|██████████| 1/1 [00:00<00:00, 164.79it/s]
100%|██████████| 1/1 [00:00<00:00, 164.79it/s]
Casting the dataset: 100%|██████████| 9/9 [00:00<00:00, 9007.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 965.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.25 examples/s]
Map: 100%|██████████| 9/9 [00:00<00:00, 447.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 1.6314164400100708, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8412698412698413, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.078, 'eval_steps_per_second': 12.078, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:06<00:03,  1.64s/it]

{'eval_loss': 0.9276310205459595, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.212, 'eval_steps_per_second': 12.212, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:10<00:00,  1.72s/it]

{'eval_loss': 0.8563469648361206, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.215, 'eval_steps_per_second': 12.215, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.05s/it]


{'train_runtime': 12.2906, 'train_samples_per_second': 2.197, 'train_steps_per_second': 0.488, 'train_loss': 1.3205515543619792, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 124.10it/s]
100%|██████████| 1/1 [00:00<00:00, 164.81it/s]
100%|██████████| 1/1 [00:00<00:00, 165.05it/s]
Casting the dataset: 100%|██████████| 9/9 [00:00<00:00, 4500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 962.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 9/9 [00:00<00:00, 527.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shou

{'eval_loss': 1.6314164400100708, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8412698412698413, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.689, 'eval_steps_per_second': 12.689, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:06<00:03,  1.56s/it]

{'eval_loss': 0.9276310205459595, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.003, 'eval_steps_per_second': 13.003, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:10<00:00,  1.69s/it]

{'eval_loss': 0.8563469648361206, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.851, 'eval_steps_per_second': 12.851, 'epoch': 3.0}


100%|██████████| 6/6 [00:11<00:00,  1.99s/it]


{'train_runtime': 11.9472, 'train_samples_per_second': 2.26, 'train_steps_per_second': 0.502, 'train_loss': 1.3205515543619792, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.86it/s]
100%|██████████| 1/1 [00:00<00:00, 166.74it/s]
100%|██████████| 1/1 [00:00<00:00, 165.21it/s]
Casting the dataset: 100%|██████████| 9/9 [00:00<00:00, 8998.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 9/9 [00:00<00:00, 596.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shou

{'eval_loss': 1.6314164400100708, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8412698412698413, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.22, 'eval_steps_per_second': 12.22, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:06<00:03,  1.61s/it]

{'eval_loss': 0.9276310205459595, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.62, 'eval_steps_per_second': 11.62, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:10<00:00,  1.72s/it]

{'eval_loss': 0.8563469648361206, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.213, 'eval_steps_per_second': 12.213, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.04s/it]


{'train_runtime': 12.2175, 'train_samples_per_second': 2.21, 'train_steps_per_second': 0.491, 'train_loss': 1.3205515543619792, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
100%|██████████| 1/1 [00:00<00:00, 166.75it/s]
100%|██████████| 1/1 [00:00<00:00, 164.67it/s]
Casting the dataset: 100%|██████████| 10/10 [00:00<00:00, 9998.34 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 473.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.6291530132293701, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8412698412698413, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.219, 'eval_steps_per_second': 12.219, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.69s/it]

{'eval_loss': 0.9255768060684204, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.778, 'eval_steps_per_second': 11.778, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.79s/it]

{'eval_loss': 0.8627501726150513, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.289, 'eval_steps_per_second': 12.289, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.09s/it]


{'train_runtime': 12.5192, 'train_samples_per_second': 2.396, 'train_steps_per_second': 0.479, 'train_loss': 1.2959855397542317, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.82it/s]
100%|██████████| 1/1 [00:00<00:00, 164.43it/s]
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
Casting the dataset: 100%|██████████| 10/10 [00:00<00:00, 10000.72 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 963.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 472.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.6291530132293701, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8412698412698413, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.287, 'eval_steps_per_second': 12.287, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.73s/it]

{'eval_loss': 0.9255768060684204, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.372, 'eval_steps_per_second': 12.372, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.82s/it]

{'eval_loss': 0.8627501726150513, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0837, 'eval_samples_per_second': 11.949, 'eval_steps_per_second': 11.949, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.11s/it]


{'train_runtime': 12.6803, 'train_samples_per_second': 2.366, 'train_steps_per_second': 0.473, 'train_loss': 1.2959855397542317, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
100%|██████████| 1/1 [00:00<00:00, 164.94it/s]
100%|██████████| 1/1 [00:00<00:00, 164.84it/s]
Casting the dataset: 100%|██████████| 10/10 [00:00<00:00, 9998.34 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 473.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.6291530132293701, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8412698412698413, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.778, 'eval_steps_per_second': 11.778, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.70s/it]

{'eval_loss': 0.9255768060684204, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.371, 'eval_steps_per_second': 11.371, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.80s/it]

{'eval_loss': 0.8627501726150513, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.654, 'eval_steps_per_second': 11.654, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.09s/it]


{'train_runtime': 12.5457, 'train_samples_per_second': 2.391, 'train_steps_per_second': 0.478, 'train_loss': 1.2959855397542317, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.94it/s]
100%|██████████| 1/1 [00:00<00:00, 141.54it/s]
100%|██████████| 1/1 [00:00<00:00, 164.64it/s]
Casting the dataset: 100%|██████████| 10/10 [00:00<00:00, 9666.52 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 521.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.6291530132293701, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8412698412698413, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.519, 'eval_steps_per_second': 12.519, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.72s/it]

{'eval_loss': 0.9255768060684204, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0969, 'eval_samples_per_second': 10.322, 'eval_steps_per_second': 10.322, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.81s/it]

{'eval_loss': 0.8627501726150513, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.225, 'eval_steps_per_second': 12.225, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.12s/it]


{'train_runtime': 12.7197, 'train_samples_per_second': 2.359, 'train_steps_per_second': 0.472, 'train_loss': 1.2959855397542317, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.20it/s]
100%|██████████| 1/1 [00:00<00:00, 164.94it/s]
100%|██████████| 1/1 [00:00<00:00, 166.63it/s]
Casting the dataset: 100%|██████████| 10/10 [00:00<00:00, 4914.82 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 959.14 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 496.92 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.22 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.6291530132293701, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8412698412698413, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.143, 'eval_steps_per_second': 12.143, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.69s/it]

{'eval_loss': 0.9255768060684204, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.22, 'eval_steps_per_second': 12.22, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.80s/it]

{'eval_loss': 0.8627501726150513, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.218, 'eval_steps_per_second': 12.218, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.10s/it]


{'train_runtime': 12.583, 'train_samples_per_second': 2.384, 'train_steps_per_second': 0.477, 'train_loss': 1.2959855397542317, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
100%|██████████| 1/1 [00:00<00:00, 162.31it/s]
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
Casting the dataset: 100%|██████████| 11/11 [00:00<00:00, 7182.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 456.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.6070541143417358, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.1385, 'eval_samples_per_second': 7.219, 'eval_steps_per_second': 7.219, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.82s/it]

{'eval_loss': 1.2559070587158203, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.514, 'eval_steps_per_second': 12.514, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.92s/it]

{'eval_loss': 1.1529425382614136, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1334, 'eval_samples_per_second': 7.496, 'eval_steps_per_second': 7.496, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.20s/it]


{'train_runtime': 13.2073, 'train_samples_per_second': 2.499, 'train_steps_per_second': 0.454, 'train_loss': 1.3109699885050456, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.05it/s]
100%|██████████| 1/1 [00:00<00:00, 141.86it/s]
100%|██████████| 1/1 [00:00<00:00, 165.00it/s]
Casting the dataset: 100%|██████████| 11/11 [00:00<00:00, 10497.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 456.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.6070541143417358, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0909, 'eval_samples_per_second': 11.006, 'eval_steps_per_second': 11.006, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.79s/it]

{'eval_loss': 1.2559070587158203, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0875, 'eval_samples_per_second': 11.433, 'eval_steps_per_second': 11.433, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.88s/it]

{'eval_loss': 1.1529425382614136, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.517, 'eval_steps_per_second': 12.517, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.17s/it]


{'train_runtime': 13.0011, 'train_samples_per_second': 2.538, 'train_steps_per_second': 0.461, 'train_loss': 1.3109699885050456, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 98.70it/s]
100%|██████████| 1/1 [00:00<00:00, 165.06it/s]
100%|██████████| 1/1 [00:00<00:00, 164.99it/s]
Casting the dataset: 100%|██████████| 11/11 [00:00<00:00, 5504.99 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 437.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.83 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.6070541143417358, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.76, 'eval_steps_per_second': 11.76, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.84s/it]

{'eval_loss': 1.2559070587158203, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1399, 'eval_samples_per_second': 7.146, 'eval_steps_per_second': 7.146, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  1.93s/it]

{'eval_loss': 1.1529425382614136, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1082, 'eval_samples_per_second': 9.243, 'eval_steps_per_second': 9.243, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.21s/it]


{'train_runtime': 13.2667, 'train_samples_per_second': 2.487, 'train_steps_per_second': 0.452, 'train_loss': 1.3109699885050456, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 165.15it/s]
100%|██████████| 1/1 [00:00<00:00, 141.51it/s]
Casting the dataset: 100%|██████████| 11/11 [00:00<00:00, 11013.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 437.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 321.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 307.10 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.6070541143417358, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0886, 'eval_samples_per_second': 11.288, 'eval_steps_per_second': 11.288, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.80s/it]

{'eval_loss': 1.2559070587158203, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.899, 'eval_steps_per_second': 11.899, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.90s/it]

{'eval_loss': 1.1529425382614136, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.896, 'eval_steps_per_second': 11.896, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.19s/it]


{'train_runtime': 13.1133, 'train_samples_per_second': 2.517, 'train_steps_per_second': 0.458, 'train_loss': 1.3109699885050456, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.75it/s]
100%|██████████| 1/1 [00:00<00:00, 165.47it/s]
100%|██████████| 1/1 [00:00<00:00, 165.03it/s]
Casting the dataset: 100%|██████████| 11/11 [00:00<00:00, 11003.42 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 456.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.64 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.6070541143417358, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.773, 'eval_steps_per_second': 11.773, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.84s/it]

{'eval_loss': 1.2559070587158203, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1333, 'eval_samples_per_second': 7.499, 'eval_steps_per_second': 7.499, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  1.94s/it]

{'eval_loss': 1.1529425382614136, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1374, 'eval_samples_per_second': 7.277, 'eval_steps_per_second': 7.277, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.22s/it]


{'train_runtime': 13.3258, 'train_samples_per_second': 2.476, 'train_steps_per_second': 0.45, 'train_loss': 1.3109699885050456, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.94it/s]
100%|██████████| 1/1 [00:00<00:00, 200.07it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
Casting the dataset: 100%|██████████| 12/12 [00:00<00:00, 12000.87 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 459.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 484.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.63 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.6116104125976562, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8412698412698413, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.218, 'eval_steps_per_second': 12.218, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.90s/it]

{'eval_loss': 0.945729672908783, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.689, 'eval_steps_per_second': 12.689, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  2.01s/it]

{'eval_loss': 0.8931344747543335, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.381, 'eval_steps_per_second': 12.381, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.27s/it]


{'train_runtime': 13.6273, 'train_samples_per_second': 2.642, 'train_steps_per_second': 0.44, 'train_loss': 1.3070155779520671, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.50it/s]
100%|██████████| 1/1 [00:00<00:00, 164.88it/s]
100%|██████████| 1/1 [00:00<00:00, 164.88it/s]
Casting the dataset: 100%|██████████| 12/12 [00:00<00:00, 6000.43 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 963.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 426.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 491.08 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.6116104125976562, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8412698412698413, 'eval_span_f1': 0.0, 'eval_runtime': 0.093, 'eval_samples_per_second': 10.757, 'eval_steps_per_second': 10.757, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.92s/it]

{'eval_loss': 0.945729672908783, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0911, 'eval_samples_per_second': 10.975, 'eval_steps_per_second': 10.975, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  2.04s/it]

{'eval_loss': 0.8931344747543335, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.065, 'eval_steps_per_second': 12.065, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.30s/it]


{'train_runtime': 13.7983, 'train_samples_per_second': 2.609, 'train_steps_per_second': 0.435, 'train_loss': 1.3070155779520671, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.97it/s]
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
100%|██████████| 1/1 [00:00<00:00, 165.18it/s]
Casting the dataset: 100%|██████████| 12/12 [00:00<00:00, 11998.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 373.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.6116104125976562, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8412698412698413, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.061, 'eval_steps_per_second': 12.061, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.88s/it]

{'eval_loss': 0.945729672908783, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.923, 'eval_steps_per_second': 11.923, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  1.99s/it]

{'eval_loss': 0.8931344747543335, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.08, 'eval_steps_per_second': 12.08, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.25s/it]


{'train_runtime': 13.5246, 'train_samples_per_second': 2.662, 'train_steps_per_second': 0.444, 'train_loss': 1.3070155779520671, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.48it/s]
100%|██████████| 1/1 [00:00<00:00, 166.63it/s]
100%|██████████| 1/1 [00:00<00:00, 164.70it/s]
Casting the dataset: 100%|██████████| 12/12 [00:00<00:00, 5730.58 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 484.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 477.53 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.6116104125976562, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8412698412698413, 'eval_span_f1': 0.0, 'eval_runtime': 0.0835, 'eval_samples_per_second': 11.978, 'eval_steps_per_second': 11.978, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.92s/it]

{'eval_loss': 0.945729672908783, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.934, 'eval_steps_per_second': 11.934, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  2.01s/it]

{'eval_loss': 0.8931344747543335, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.513, 'eval_steps_per_second': 11.513, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.27s/it]


{'train_runtime': 13.6464, 'train_samples_per_second': 2.638, 'train_steps_per_second': 0.44, 'train_loss': 1.3070155779520671, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.07it/s]
100%|██████████| 1/1 [00:00<00:00, 164.58it/s]
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
Casting the dataset: 100%|██████████| 12/12 [00:00<00:00, 12000.87 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 417.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.6116104125976562, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8412698412698413, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.648, 'eval_steps_per_second': 11.648, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.86s/it]

{'eval_loss': 0.945729672908783, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0946, 'eval_samples_per_second': 10.573, 'eval_steps_per_second': 10.573, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  1.97s/it]

{'eval_loss': 0.8931344747543335, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.377, 'eval_steps_per_second': 12.377, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.23s/it]


{'train_runtime': 13.405, 'train_samples_per_second': 2.686, 'train_steps_per_second': 0.448, 'train_loss': 1.3070155779520671, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.21it/s]
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
Casting the dataset: 100%|██████████| 13/13 [00:00<00:00, 13010.25 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 964.21 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.38 examples/s]
Map: 100%|██████████| 13/13 [00:00<00:00, 349.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 488.51 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.5943126678466797, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8571428571428571, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.274, 'eval_steps_per_second': 12.274, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:03,  2.00s/it]

{'eval_loss': 0.9841263294219971, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0929, 'eval_samples_per_second': 10.763, 'eval_steps_per_second': 10.763, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  2.11s/it]

{'eval_loss': 0.9397985339164734, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0773, 'eval_samples_per_second': 12.945, 'eval_steps_per_second': 12.945, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.36s/it]


{'train_runtime': 14.1661, 'train_samples_per_second': 2.753, 'train_steps_per_second': 0.424, 'train_loss': 1.281215985616048, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.38it/s]
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
100%|██████████| 1/1 [00:00<00:00, 164.44it/s]
Casting the dataset: 100%|██████████| 13/13 [00:00<00:00, 6496.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Map: 100%|██████████| 13/13 [00:00<00:00, 340.24 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.70 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.5943126678466797, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8571428571428571, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.378, 'eval_steps_per_second': 11.378, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:03,  1.99s/it]

{'eval_loss': 0.9841263294219971, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0865, 'eval_samples_per_second': 11.564, 'eval_steps_per_second': 11.564, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  2.11s/it]

{'eval_loss': 0.9397985339164734, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.932, 'eval_steps_per_second': 11.932, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.36s/it]


{'train_runtime': 14.1705, 'train_samples_per_second': 2.752, 'train_steps_per_second': 0.423, 'train_loss': 1.281215985616048, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.63it/s]
100%|██████████| 1/1 [00:00<00:00, 165.26it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
Casting the dataset: 100%|██████████| 13/13 [00:00<00:00, 12985.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 958.70 examples/s]
Map: 100%|██████████| 13/13 [00:00<00:00, 353.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.5943126678466797, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8571428571428571, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.078, 'eval_steps_per_second': 12.078, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.00s/it]

{'eval_loss': 0.9841263294219971, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.207, 'eval_steps_per_second': 12.207, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  2.10s/it]

{'eval_loss': 0.9397985339164734, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.363, 'eval_steps_per_second': 12.363, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.37s/it]


{'train_runtime': 14.1893, 'train_samples_per_second': 2.749, 'train_steps_per_second': 0.423, 'train_loss': 1.281215985616048, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.56it/s]
100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
100%|██████████| 1/1 [00:00<00:00, 164.48it/s]
Casting the dataset: 100%|██████████| 13/13 [00:00<00:00, 6505.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Map: 100%|██████████| 13/13 [00:00<00:00, 340.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.37 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.5943126678466797, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8571428571428571, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.195, 'eval_steps_per_second': 12.195, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.00s/it]

{'eval_loss': 0.9841263294219971, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.374, 'eval_steps_per_second': 12.374, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  2.07s/it]

{'eval_loss': 0.9397985339164734, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.007, 'eval_steps_per_second': 13.007, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.34s/it]


{'train_runtime': 14.0607, 'train_samples_per_second': 2.774, 'train_steps_per_second': 0.427, 'train_loss': 1.281215985616048, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.68it/s]
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
100%|██████████| 1/1 [00:00<00:00, 165.01it/s]
Casting the dataset: 100%|██████████| 13/13 [00:00<00:00, 6392.26 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 961.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 13/13 [00:00<00:00, 331.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.5943126678466797, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8571428571428571, 'eval_span_f1': 0.0, 'eval_runtime': 0.0789, 'eval_samples_per_second': 12.679, 'eval_steps_per_second': 12.679, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.01s/it]

{'eval_loss': 0.9841263294219971, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.133, 'eval_steps_per_second': 12.133, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  2.08s/it]

{'eval_loss': 0.9397985339164734, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.051, 'eval_steps_per_second': 12.051, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.35s/it]


{'train_runtime': 14.09, 'train_samples_per_second': 2.768, 'train_steps_per_second': 0.426, 'train_loss': 1.281215985616048, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.44it/s]
100%|██████████| 1/1 [00:00<00:00, 164.23it/s]
100%|██████████| 1/1 [00:00<00:00, 164.90it/s]
Casting the dataset: 100%|██████████| 14/14 [00:00<00:00, 14001.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 14/14 [00:00<00:00, 348.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.6040107011795044, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8571428571428571, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.369, 'eval_steps_per_second': 12.369, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.08s/it]

{'eval_loss': 0.9283996224403381, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.362, 'eval_steps_per_second': 12.362, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.18s/it]

{'eval_loss': 0.8814278841018677, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.291, 'eval_steps_per_second': 12.291, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.43s/it]


{'train_runtime': 14.5781, 'train_samples_per_second': 2.881, 'train_steps_per_second': 0.412, 'train_loss': 1.2899072964986165, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.94it/s]
100%|██████████| 1/1 [00:00<00:00, 162.82it/s]
100%|██████████| 1/1 [00:00<00:00, 165.02it/s]
Casting the dataset: 100%|██████████| 14/14 [00:00<00:00, 13471.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 963.54 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Map: 100%|██████████| 14/14 [00:00<00:00, 323.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 260.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.6040107011795044, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8571428571428571, 'eval_span_f1': 0.0, 'eval_runtime': 0.09, 'eval_samples_per_second': 11.109, 'eval_steps_per_second': 11.109, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.10s/it]

{'eval_loss': 0.9283996224403381, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.917, 'eval_steps_per_second': 11.917, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.18s/it]

{'eval_loss': 0.8814278841018677, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0789, 'eval_samples_per_second': 12.682, 'eval_steps_per_second': 12.682, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.44s/it]


{'train_runtime': 14.6425, 'train_samples_per_second': 2.868, 'train_steps_per_second': 0.41, 'train_loss': 1.2899072964986165, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
100%|██████████| 1/1 [00:00<00:00, 165.03it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
Casting the dataset: 100%|██████████| 14/14 [00:00<00:00, 13997.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 14/14 [00:00<00:00, 339.47 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.19 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.6040107011795044, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8571428571428571, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.204, 'eval_steps_per_second': 12.204, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.09s/it]

{'eval_loss': 0.9283996224403381, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0757, 'eval_samples_per_second': 13.204, 'eval_steps_per_second': 13.204, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.17s/it]

{'eval_loss': 0.8814278841018677, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.081, 'eval_samples_per_second': 12.352, 'eval_steps_per_second': 12.352, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.42s/it]


{'train_runtime': 14.5104, 'train_samples_per_second': 2.894, 'train_steps_per_second': 0.413, 'train_loss': 1.2899072964986165, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.84it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
Casting the dataset: 100%|██████████| 14/14 [00:00<00:00, 13987.67 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 14/14 [00:00<00:00, 366.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.6040107011795044, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8571428571428571, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.781, 'eval_steps_per_second': 11.781, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.09s/it]

{'eval_loss': 0.9283996224403381, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.212, 'eval_steps_per_second': 12.212, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.19s/it]

{'eval_loss': 0.8814278841018677, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0756, 'eval_samples_per_second': 13.227, 'eval_steps_per_second': 13.227, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.43s/it]


{'train_runtime': 14.5718, 'train_samples_per_second': 2.882, 'train_steps_per_second': 0.412, 'train_loss': 1.2899072964986165, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.50it/s]
100%|██████████| 1/1 [00:00<00:00, 164.98it/s]
100%|██████████| 1/1 [00:00<00:00, 165.21it/s]
Casting the dataset: 100%|██████████| 14/14 [00:00<00:00, 6883.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 964.43 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 14/14 [00:00<00:00, 331.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 395.47 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.6040107011795044, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8571428571428571, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.216, 'eval_steps_per_second': 12.216, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.10s/it]

{'eval_loss': 0.9283996224403381, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.195, 'eval_steps_per_second': 13.195, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.18s/it]

{'eval_loss': 0.8814278841018677, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.024, 'eval_steps_per_second': 13.024, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.44s/it]


{'train_runtime': 14.622, 'train_samples_per_second': 2.872, 'train_steps_per_second': 0.41, 'train_loss': 1.2899072964986165, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.13it/s]
100%|██████████| 1/1 [00:00<00:00, 164.98it/s]
100%|██████████| 1/1 [00:00<00:00, 141.48it/s]
Casting the dataset: 100%|██████████| 15/15 [00:00<00:00, 14983.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 956.95 examples/s]
Map: 100%|██████████| 15/15 [00:00<00:00, 330.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.606673002243042, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8571428571428571, 'eval_span_f1': 0.0, 'eval_runtime': 0.0913, 'eval_samples_per_second': 10.95, 'eval_steps_per_second': 10.95, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.21s/it]

{'eval_loss': 0.9253917336463928, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.926, 'eval_steps_per_second': 11.926, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.29s/it]

{'eval_loss': 0.8750457167625427, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.26, 'eval_steps_per_second': 12.26, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.53s/it]


{'train_runtime': 15.1778, 'train_samples_per_second': 2.965, 'train_steps_per_second': 0.395, 'train_loss': 1.2930413881937664, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.59it/s]
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
Casting the dataset: 100%|██████████| 15/15 [00:00<00:00, 15001.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.45 examples/s]
Map: 100%|██████████| 15/15 [00:00<00:00, 316.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.606673002243042, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8571428571428571, 'eval_span_f1': 0.0, 'eval_runtime': 0.1241, 'eval_samples_per_second': 8.059, 'eval_steps_per_second': 8.059, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.22s/it]

{'eval_loss': 0.9253917336463928, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.377, 'eval_steps_per_second': 12.377, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.28s/it]

{'eval_loss': 0.8750457167625427, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.923, 'eval_steps_per_second': 11.923, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.53s/it]


{'train_runtime': 15.1896, 'train_samples_per_second': 2.963, 'train_steps_per_second': 0.395, 'train_loss': 1.2930413881937664, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.86it/s]
100%|██████████| 1/1 [00:00<00:00, 164.53it/s]
100%|██████████| 1/1 [00:00<00:00, 141.32it/s]
Casting the dataset: 100%|██████████| 15/15 [00:00<00:00, 7500.54 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Map: 100%|██████████| 15/15 [00:00<00:00, 347.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.606673002243042, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8571428571428571, 'eval_span_f1': 0.0, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.305, 'eval_steps_per_second': 12.305, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.17s/it]

{'eval_loss': 0.9253917336463928, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.983, 'eval_steps_per_second': 11.983, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.29s/it]

{'eval_loss': 0.8750457167625427, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.036, 'eval_steps_per_second': 12.036, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.52s/it]


{'train_runtime': 15.0891, 'train_samples_per_second': 2.982, 'train_steps_per_second': 0.398, 'train_loss': 1.2930413881937664, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.37it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
100%|██████████| 1/1 [00:00<00:00, 164.75it/s]
Casting the dataset: 100%|██████████| 15/15 [00:00<00:00, 14976.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.30 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Map: 100%|██████████| 15/15 [00:00<00:00, 346.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.72 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.606673002243042, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8571428571428571, 'eval_span_f1': 0.0, 'eval_runtime': 0.0862, 'eval_samples_per_second': 11.595, 'eval_steps_per_second': 11.595, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.18s/it]

{'eval_loss': 0.9253917336463928, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.921, 'eval_steps_per_second': 11.921, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.29s/it]

{'eval_loss': 0.8750457167625427, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.286, 'eval_steps_per_second': 12.286, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.52s/it]


{'train_runtime': 15.1457, 'train_samples_per_second': 2.971, 'train_steps_per_second': 0.396, 'train_loss': 1.2930413881937664, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
100%|██████████| 1/1 [00:00<00:00, 165.05it/s]
100%|██████████| 1/1 [00:00<00:00, 164.83it/s]
Casting the dataset: 100%|██████████| 15/15 [00:00<00:00, 15026.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.96 examples/s]
Map: 100%|██████████| 15/15 [00:00<00:00, 324.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 488.51 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.606673002243042, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8571428571428571, 'eval_span_f1': 0.0, 'eval_runtime': 0.133, 'eval_samples_per_second': 7.517, 'eval_steps_per_second': 7.517, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.19s/it]

{'eval_loss': 0.9253917336463928, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.212, 'eval_steps_per_second': 12.212, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.28s/it]

{'eval_loss': 0.8750457167625427, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.208, 'eval_steps_per_second': 12.208, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.52s/it]


{'train_runtime': 15.118, 'train_samples_per_second': 2.977, 'train_steps_per_second': 0.397, 'train_loss': 1.2930413881937664, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.78it/s]
100%|██████████| 1/1 [00:00<00:00, 164.96it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
Casting the dataset: 100%|██████████| 16/16 [00:00<00:00, 15997.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 964.43 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 370.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 324.74 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.6083663702011108, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8571428571428571, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.379, 'eval_steps_per_second': 11.379, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:09<00:04,  2.30s/it]

{'eval_loss': 0.9258723855018616, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.846, 'eval_steps_per_second': 12.846, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:14<00:00,  2.41s/it]

{'eval_loss': 0.8529059886932373, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.518, 'eval_steps_per_second': 11.518, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.63s/it]


{'train_runtime': 15.7635, 'train_samples_per_second': 3.045, 'train_steps_per_second': 0.381, 'train_loss': 1.2937699953715007, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.94it/s]
100%|██████████| 1/1 [00:00<00:00, 165.25it/s]
100%|██████████| 1/1 [00:00<00:00, 141.66it/s]
Casting the dataset: 100%|██████████| 16/16 [00:00<00:00, 16004.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 966.43 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 346.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.6083663702011108, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8571428571428571, 'eval_span_f1': 0.0, 'eval_runtime': 0.0803, 'eval_samples_per_second': 12.447, 'eval_steps_per_second': 12.447, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:09<00:04,  2.30s/it]

{'eval_loss': 0.9258723855018616, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.932, 'eval_steps_per_second': 11.932, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:14<00:00,  2.40s/it]

{'eval_loss': 0.8529059886932373, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.362, 'eval_steps_per_second': 12.362, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.62s/it]


{'train_runtime': 15.7418, 'train_samples_per_second': 3.049, 'train_steps_per_second': 0.381, 'train_loss': 1.2937699953715007, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
100%|██████████| 1/1 [00:00<00:00, 164.95it/s]
100%|██████████| 1/1 [00:00<00:00, 164.95it/s]
Casting the dataset: 100%|██████████| 16/16 [00:00<00:00, 8008.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 970.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.75 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 353.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.6083663702011108, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8571428571428571, 'eval_span_f1': 0.0, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.301, 'eval_steps_per_second': 12.301, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:09<00:04,  2.27s/it]

{'eval_loss': 0.9258723855018616, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.79, 'eval_steps_per_second': 11.79, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:14<00:00,  2.39s/it]

{'eval_loss': 0.8529059886932373, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.639, 'eval_steps_per_second': 11.639, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.60s/it]


{'train_runtime': 15.5921, 'train_samples_per_second': 3.078, 'train_steps_per_second': 0.385, 'train_loss': 1.2937699953715007, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.51it/s]
100%|██████████| 1/1 [00:00<00:00, 165.12it/s]
100%|██████████| 1/1 [00:00<00:00, 165.18it/s]
Casting the dataset: 100%|██████████| 16/16 [00:00<00:00, 15985.91 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 488.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 331.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.87 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.6083663702011108, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8571428571428571, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.209, 'eval_steps_per_second': 12.209, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:09<00:04,  2.28s/it]

{'eval_loss': 0.9258723855018616, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.369, 'eval_steps_per_second': 12.369, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:14<00:00,  2.39s/it]

{'eval_loss': 0.8529059886932373, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.25, 'eval_steps_per_second': 11.25, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.61s/it]


{'train_runtime': 15.6552, 'train_samples_per_second': 3.066, 'train_steps_per_second': 0.383, 'train_loss': 1.2937699953715007, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.99it/s]
100%|██████████| 1/1 [00:00<00:00, 164.99it/s]
100%|██████████| 1/1 [00:00<00:00, 200.06it/s]
Casting the dataset: 100%|██████████| 16/16 [00:00<00:00, 16024.08 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.39 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 312.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.6083663702011108, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8571428571428571, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.797, 'eval_steps_per_second': 11.797, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:09<00:04,  2.27s/it]

{'eval_loss': 0.9258723855018616, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.708, 'eval_steps_per_second': 11.708, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:14<00:00,  2.38s/it]

{'eval_loss': 0.8529059886932373, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.93, 'eval_steps_per_second': 11.93, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.59s/it]


{'train_runtime': 15.5588, 'train_samples_per_second': 3.085, 'train_steps_per_second': 0.386, 'train_loss': 1.2937699953715007, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 164.82it/s]
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
Casting the dataset: 100%|██████████| 17/17 [00:00<00:00, 16395.30 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 17/17 [00:00<00:00, 319.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.3901777267456055, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0884, 'eval_samples_per_second': 11.313, 'eval_steps_per_second': 11.313, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:10<00:04,  1.58s/it]

{'eval_loss': 1.079133152961731, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0886, 'eval_samples_per_second': 11.286, 'eval_steps_per_second': 11.286, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:16<00:00,  1.65s/it]

{'eval_loss': 1.033545970916748, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.373, 'eval_steps_per_second': 12.373, 'epoch': 3.0}


100%|██████████| 9/9 [00:17<00:00,  1.96s/it]


{'train_runtime': 17.6089, 'train_samples_per_second': 2.896, 'train_steps_per_second': 0.511, 'train_loss': 1.13758118947347, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
100%|██████████| 1/1 [00:00<00:00, 164.90it/s]
100%|██████████| 1/1 [00:00<00:00, 165.03it/s]
Casting the dataset: 100%|██████████| 17/17 [00:00<00:00, 8503.66 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 491.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 17/17 [00:00<00:00, 318.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.8175948858261108, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.746031746031746, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.509, 'eval_steps_per_second': 11.509, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:10<00:04,  1.61s/it]

{'eval_loss': 1.206871509552002, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.219, 'eval_steps_per_second': 12.219, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:16<00:00,  1.65s/it]

{'eval_loss': 1.043592929840088, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.37, 'eval_steps_per_second': 12.37, 'epoch': 3.0}


100%|██████████| 9/9 [00:17<00:00,  1.99s/it]


{'train_runtime': 17.9539, 'train_samples_per_second': 2.841, 'train_steps_per_second': 0.501, 'train_loss': 1.4153001573350694, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.23it/s]
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
100%|██████████| 1/1 [00:00<00:00, 141.67it/s]
Casting the dataset: 100%|██████████| 17/17 [00:00<00:00, 17021.52 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 17/17 [00:00<00:00, 328.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 274.82 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.8175948858261108, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.746031746031746, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.782, 'eval_steps_per_second': 11.782, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:10<00:04,  1.61s/it]

{'eval_loss': 1.206871509552002, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.076, 'eval_steps_per_second': 12.076, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:16<00:00,  1.65s/it]

{'eval_loss': 1.043592929840088, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.057, 'eval_steps_per_second': 12.057, 'epoch': 3.0}


100%|██████████| 9/9 [00:17<00:00,  1.99s/it]


{'train_runtime': 17.8893, 'train_samples_per_second': 2.851, 'train_steps_per_second': 0.503, 'train_loss': 1.4153001573350694, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.79it/s]
100%|██████████| 1/1 [00:00<00:00, 165.19it/s]
100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
Casting the dataset: 100%|██████████| 17/17 [00:00<00:00, 17001.23 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 17/17 [00:00<00:00, 313.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.31 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
Yo

{'eval_loss': 1.8175948858261108, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.746031746031746, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.376, 'eval_steps_per_second': 11.376, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:10<00:04,  1.62s/it]

{'eval_loss': 1.206871509552002, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.222, 'eval_steps_per_second': 12.222, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:16<00:00,  1.64s/it]

{'eval_loss': 1.043592929840088, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.019, 'eval_steps_per_second': 13.019, 'epoch': 3.0}


100%|██████████| 9/9 [00:17<00:00,  1.97s/it]


{'train_runtime': 17.6893, 'train_samples_per_second': 2.883, 'train_steps_per_second': 0.509, 'train_loss': 1.4153001573350694, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.93it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 141.25it/s]
Casting the dataset: 100%|██████████| 17/17 [00:00<00:00, 8501.63 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 491.42 examples/s]
Map: 100%|██████████| 17/17 [00:00<00:00, 331.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.8175948858261108, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.746031746031746, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.268, 'eval_steps_per_second': 12.268, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:10<00:04,  1.58s/it]

{'eval_loss': 1.206871509552002, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.149, 'eval_steps_per_second': 12.149, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:16<00:00,  1.65s/it]

{'eval_loss': 1.043592929840088, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.353, 'eval_steps_per_second': 12.353, 'epoch': 3.0}


100%|██████████| 9/9 [00:17<00:00,  1.96s/it]


{'train_runtime': 17.6308, 'train_samples_per_second': 2.893, 'train_steps_per_second': 0.51, 'train_loss': 1.4153001573350694, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.72it/s]
100%|██████████| 1/1 [00:00<00:00, 165.12it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 18/18 [00:00<00:00, 8819.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 18/18 [00:00<00:00, 309.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.8119639158248901, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0898, 'eval_samples_per_second': 11.133, 'eval_steps_per_second': 11.133, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:10<00:04,  1.66s/it]

{'eval_loss': 1.2009706497192383, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.515, 'eval_steps_per_second': 11.515, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:16<00:00,  1.72s/it]

{'eval_loss': 1.0361628532409668, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.369, 'eval_steps_per_second': 12.369, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.03s/it]


{'train_runtime': 18.2374, 'train_samples_per_second': 2.961, 'train_steps_per_second': 0.493, 'train_loss': 1.4079142676459417, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
100%|██████████| 1/1 [00:00<00:00, 164.95it/s]
100%|██████████| 1/1 [00:00<00:00, 165.14it/s]
Casting the dataset: 100%|██████████| 18/18 [00:00<00:00, 17205.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 18/18 [00:00<00:00, 319.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 488.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.8119639158248901, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.22, 'eval_steps_per_second': 12.22, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:10<00:05,  1.69s/it]

{'eval_loss': 1.2009706497192383, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.224, 'eval_steps_per_second': 12.224, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.76s/it]

{'eval_loss': 1.0361628532409668, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.004, 'eval_steps_per_second': 12.004, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.05s/it]


{'train_runtime': 18.4715, 'train_samples_per_second': 2.923, 'train_steps_per_second': 0.487, 'train_loss': 1.4079142676459417, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.17it/s]
100%|██████████| 1/1 [00:00<00:00, 163.62it/s]
100%|██████████| 1/1 [00:00<00:00, 165.21it/s]
Casting the dataset: 100%|██████████| 18/18 [00:00<00:00, 8841.49 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 960.89 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 18/18 [00:00<00:00, 344.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.8119639158248901, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.905, 'eval_steps_per_second': 11.905, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:10<00:04,  1.65s/it]

{'eval_loss': 1.2009706497192383, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.369, 'eval_steps_per_second': 12.369, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:16<00:00,  1.71s/it]

{'eval_loss': 1.0361628532409668, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.366, 'eval_steps_per_second': 12.366, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.01s/it]


{'train_runtime': 18.0798, 'train_samples_per_second': 2.987, 'train_steps_per_second': 0.498, 'train_loss': 1.4079142676459417, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.09it/s]
100%|██████████| 1/1 [00:00<00:00, 162.42it/s]
100%|██████████| 1/1 [00:00<00:00, 141.72it/s]
Casting the dataset: 100%|██████████| 18/18 [00:00<00:00, 9004.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 18/18 [00:00<00:00, 314.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.8119639158248901, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.626, 'eval_steps_per_second': 11.626, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.70s/it]

{'eval_loss': 1.2009706497192383, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.192, 'eval_steps_per_second': 13.192, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.74s/it]

{'eval_loss': 1.0361628532409668, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0835, 'eval_samples_per_second': 11.981, 'eval_steps_per_second': 11.981, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.05s/it]


{'train_runtime': 18.4119, 'train_samples_per_second': 2.933, 'train_steps_per_second': 0.489, 'train_loss': 1.4079142676459417, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.97it/s]
100%|██████████| 1/1 [00:00<00:00, 141.23it/s]
100%|██████████| 1/1 [00:00<00:00, 164.79it/s]
Casting the dataset: 100%|██████████| 18/18 [00:00<00:00, 17997.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.62 examples/s]
Map: 100%|██████████| 18/18 [00:00<00:00, 351.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.8119639158248901, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.781, 'eval_steps_per_second': 11.781, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:10<00:05,  1.67s/it]

{'eval_loss': 1.2009706497192383, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.37, 'eval_steps_per_second': 12.37, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.74s/it]

{'eval_loss': 1.0361628532409668, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.775, 'eval_steps_per_second': 11.775, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.04s/it]


{'train_runtime': 18.3913, 'train_samples_per_second': 2.936, 'train_steps_per_second': 0.489, 'train_loss': 1.4079142676459417, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.20it/s]
100%|██████████| 1/1 [00:00<00:00, 164.81it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
Casting the dataset: 100%|██████████| 19/19 [00:00<00:00, 18996.85 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 963.10 examples/s]
Map: 100%|██████████| 19/19 [00:00<00:00, 350.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.37 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.8098591566085815, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0901, 'eval_samples_per_second': 11.093, 'eval_steps_per_second': 11.093, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.76s/it]

{'eval_loss': 1.190145492553711, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1334, 'eval_samples_per_second': 7.495, 'eval_steps_per_second': 7.495, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.82s/it]

{'eval_loss': 1.029915690422058, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1324, 'eval_samples_per_second': 7.553, 'eval_steps_per_second': 7.553, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.10s/it]


{'train_runtime': 18.8922, 'train_samples_per_second': 3.017, 'train_steps_per_second': 0.476, 'train_loss': 1.4039350085788302, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.89it/s]
100%|██████████| 1/1 [00:00<00:00, 82.54it/s]
100%|██████████| 1/1 [00:00<00:00, 141.38it/s]
Casting the dataset: 100%|██████████| 19/19 [00:00<00:00, 18992.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 486.97 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Map: 100%|██████████| 19/19 [00:00<00:00, 331.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.03 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.8098591566085815, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0994, 'eval_samples_per_second': 10.058, 'eval_steps_per_second': 10.058, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.74s/it]

{'eval_loss': 1.190145492553711, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1385, 'eval_samples_per_second': 7.222, 'eval_steps_per_second': 7.222, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.78s/it]

{'eval_loss': 1.029915690422058, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.064, 'eval_steps_per_second': 12.064, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.06s/it]


{'train_runtime': 18.5721, 'train_samples_per_second': 3.069, 'train_steps_per_second': 0.485, 'train_loss': 1.4039350085788302, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.31it/s]
100%|██████████| 1/1 [00:00<00:00, 165.16it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
Casting the dataset: 100%|██████████| 19/19 [00:00<00:00, 19005.91 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 19/19 [00:00<00:00, 337.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.8098591566085815, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.07, 'eval_steps_per_second': 12.07, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.76s/it]

{'eval_loss': 1.190145492553711, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1365, 'eval_samples_per_second': 7.324, 'eval_steps_per_second': 7.324, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.83s/it]

{'eval_loss': 1.029915690422058, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1405, 'eval_samples_per_second': 7.118, 'eval_steps_per_second': 7.118, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.11s/it]


{'train_runtime': 18.9808, 'train_samples_per_second': 3.003, 'train_steps_per_second': 0.474, 'train_loss': 1.4039350085788302, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.85it/s]
100%|██████████| 1/1 [00:00<00:00, 164.68it/s]
100%|██████████| 1/1 [00:00<00:00, 164.79it/s]
Casting the dataset: 100%|██████████| 19/19 [00:00<00:00, 9495.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 491.02 examples/s]
Map: 100%|██████████| 19/19 [00:00<00:00, 337.64 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.8098591566085815, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.1618, 'eval_samples_per_second': 6.182, 'eval_steps_per_second': 6.182, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.74s/it]

{'eval_loss': 1.190145492553711, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0932, 'eval_samples_per_second': 10.725, 'eval_steps_per_second': 10.725, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.79s/it]

{'eval_loss': 1.029915690422058, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.523, 'eval_steps_per_second': 12.523, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.09s/it]


{'train_runtime': 18.8244, 'train_samples_per_second': 3.028, 'train_steps_per_second': 0.478, 'train_loss': 1.4039350085788302, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
100%|██████████| 1/1 [00:00<00:00, 164.41it/s]
100%|██████████| 1/1 [00:00<00:00, 164.93it/s]
Casting the dataset: 100%|██████████| 19/19 [00:00<00:00, 19001.38 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.73 examples/s]
Map: 100%|██████████| 19/19 [00:00<00:00, 356.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.35 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.8098591566085815, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.1312, 'eval_samples_per_second': 7.62, 'eval_steps_per_second': 7.62, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.76s/it]

{'eval_loss': 1.190145492553711, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.087, 'eval_samples_per_second': 11.496, 'eval_steps_per_second': 11.496, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.81s/it]

{'eval_loss': 1.029915690422058, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1314, 'eval_samples_per_second': 7.612, 'eval_steps_per_second': 7.612, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.10s/it]


{'train_runtime': 18.9247, 'train_samples_per_second': 3.012, 'train_steps_per_second': 0.476, 'train_loss': 1.4039350085788302, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.67it/s]
100%|██████████| 1/1 [00:00<00:00, 164.86it/s]
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
Casting the dataset: 100%|██████████| 20/20 [00:00<00:00, 20015.77 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 300.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 318.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.43 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.8084653615951538, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7619047619047619, 'eval_span_f1': 0.0, 'eval_runtime': 0.0801, 'eval_samples_per_second': 12.485, 'eval_steps_per_second': 12.485, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.82s/it]

{'eval_loss': 1.1796751022338867, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.791, 'eval_steps_per_second': 11.791, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.87s/it]

{'eval_loss': 1.0238019227981567, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.772, 'eval_steps_per_second': 11.772, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.13s/it]


{'train_runtime': 19.1619, 'train_samples_per_second': 3.131, 'train_steps_per_second': 0.47, 'train_loss': 1.4043960571289062, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.70it/s]
100%|██████████| 1/1 [00:00<00:00, 124.02it/s]
100%|██████████| 1/1 [00:00<00:00, 164.83it/s]
Casting the dataset: 100%|██████████| 20/20 [00:00<00:00, 19987.15 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.25 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 337.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 1.8084653615951538, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7619047619047619, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.132, 'eval_steps_per_second': 12.132, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.84s/it]

{'eval_loss': 1.1796751022338867, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.914, 'eval_steps_per_second': 11.914, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  1.90s/it]

{'eval_loss': 1.0238019227981567, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.519, 'eval_steps_per_second': 12.519, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.16s/it]


{'train_runtime': 19.4216, 'train_samples_per_second': 3.089, 'train_steps_per_second': 0.463, 'train_loss': 1.4043960571289062, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.85it/s]
100%|██████████| 1/1 [00:00<00:00, 164.75it/s]
100%|██████████| 1/1 [00:00<00:00, 164.88it/s]
Casting the dataset: 100%|██████████| 20/20 [00:00<00:00, 19977.63 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 963.10 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 326.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.8084653615951538, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7619047619047619, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.047, 'eval_steps_per_second': 12.047, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.81s/it]

{'eval_loss': 1.1796751022338867, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.636, 'eval_steps_per_second': 11.636, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.87s/it]

{'eval_loss': 1.0238019227981567, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.851, 'eval_steps_per_second': 11.851, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.13s/it]


{'train_runtime': 19.1679, 'train_samples_per_second': 3.13, 'train_steps_per_second': 0.47, 'train_loss': 1.4043960571289062, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.71it/s]
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
100%|██████████| 1/1 [00:00<00:00, 141.57it/s]
Casting the dataset: 100%|██████████| 20/20 [00:00<00:00, 10000.72 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 343.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.8084653615951538, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7619047619047619, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.781, 'eval_steps_per_second': 11.781, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.85s/it]

{'eval_loss': 1.1796751022338867, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.371, 'eval_steps_per_second': 13.371, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  1.90s/it]

{'eval_loss': 1.0238019227981567, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0759, 'eval_samples_per_second': 13.18, 'eval_steps_per_second': 13.18, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.16s/it]


{'train_runtime': 19.4199, 'train_samples_per_second': 3.09, 'train_steps_per_second': 0.463, 'train_loss': 1.4043960571289062, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 141.67it/s]
100%|██████████| 1/1 [00:00<00:00, 141.85it/s]
Casting the dataset: 100%|██████████| 20/20 [00:00<00:00, 20006.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 326.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 483.10 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.8084653615951538, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7619047619047619, 'eval_span_f1': 0.0, 'eval_runtime': 0.0855, 'eval_samples_per_second': 11.698, 'eval_steps_per_second': 11.698, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.82s/it]

{'eval_loss': 1.1796751022338867, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.087, 'eval_samples_per_second': 11.492, 'eval_steps_per_second': 11.492, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.88s/it]

{'eval_loss': 1.0238019227981567, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.51, 'eval_steps_per_second': 11.51, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.14s/it]


{'train_runtime': 19.2214, 'train_samples_per_second': 3.122, 'train_steps_per_second': 0.468, 'train_loss': 1.4043960571289062, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
100%|██████████| 1/1 [00:00<00:00, 165.01it/s]
100%|██████████| 1/1 [00:00<00:00, 165.09it/s]
Casting the dataset: 100%|██████████| 21/21 [00:00<00:00, 20996.52 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 21/21 [00:00<00:00, 316.64 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.03 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.8093379735946655, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.001, 'eval_steps_per_second': 13.001, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.93s/it]

{'eval_loss': 1.1842554807662964, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0812, 'eval_samples_per_second': 12.318, 'eval_steps_per_second': 12.318, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  2.00s/it]

{'eval_loss': 1.0072641372680664, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.927, 'eval_steps_per_second': 11.927, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.22s/it]


{'train_runtime': 19.971, 'train_samples_per_second': 3.155, 'train_steps_per_second': 0.451, 'train_loss': 1.4032841788397894, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.71it/s]
100%|██████████| 1/1 [00:00<00:00, 164.74it/s]
100%|██████████| 1/1 [00:00<00:00, 141.92it/s]
Casting the dataset: 100%|██████████| 21/21 [00:00<00:00, 21001.52 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 962.66 examples/s]
Map: 100%|██████████| 21/21 [00:00<00:00, 278.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.8093379735946655, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.518, 'eval_steps_per_second': 11.518, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.90s/it]

{'eval_loss': 1.1842554807662964, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.225, 'eval_steps_per_second': 12.225, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  1.95s/it]

{'eval_loss': 1.0072641372680664, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0804, 'eval_samples_per_second': 12.444, 'eval_steps_per_second': 12.444, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.20s/it]


{'train_runtime': 19.7547, 'train_samples_per_second': 3.189, 'train_steps_per_second': 0.456, 'train_loss': 1.4032841788397894, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
Casting the dataset: 100%|██████████| 21/21 [00:00<00:00, 10500.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 21/21 [00:00<00:00, 274.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.14 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.8093379735946655, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.633, 'eval_steps_per_second': 11.633, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.89s/it]

{'eval_loss': 1.1842554807662964, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.687, 'eval_steps_per_second': 12.687, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  1.97s/it]

{'eval_loss': 1.0072641372680664, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.216, 'eval_steps_per_second': 12.216, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.21s/it]


{'train_runtime': 19.9275, 'train_samples_per_second': 3.161, 'train_steps_per_second': 0.452, 'train_loss': 1.4032841788397894, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 141.43it/s]
100%|██████████| 1/1 [00:00<00:00, 164.48it/s]
Casting the dataset: 100%|██████████| 21/21 [00:00<00:00, 10304.21 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 21/21 [00:00<00:00, 298.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.8093379735946655, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.929, 'eval_steps_per_second': 11.929, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.89s/it]

{'eval_loss': 1.1842554807662964, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.211, 'eval_steps_per_second': 12.211, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  1.95s/it]

{'eval_loss': 1.0072641372680664, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.061, 'eval_steps_per_second': 12.061, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.20s/it]


{'train_runtime': 19.7608, 'train_samples_per_second': 3.188, 'train_steps_per_second': 0.455, 'train_loss': 1.4032841788397894, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.61it/s]
100%|██████████| 1/1 [00:00<00:00, 141.35it/s]
100%|██████████| 1/1 [00:00<00:00, 165.00it/s]
Casting the dataset: 100%|██████████| 21/21 [00:00<00:00, 10499.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.99 examples/s]
Map: 100%|██████████| 21/21 [00:00<00:00, 298.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.8093379735946655, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.937, 'eval_steps_per_second': 11.937, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.89s/it]

{'eval_loss': 1.1842554807662964, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.703, 'eval_steps_per_second': 11.703, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  1.95s/it]

{'eval_loss': 1.0072641372680664, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.906, 'eval_steps_per_second': 11.906, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.20s/it]


{'train_runtime': 19.8117, 'train_samples_per_second': 3.18, 'train_steps_per_second': 0.454, 'train_loss': 1.4032841788397894, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.68it/s]
100%|██████████| 1/1 [00:00<00:00, 164.99it/s]
100%|██████████| 1/1 [00:00<00:00, 165.18it/s]
Casting the dataset: 100%|██████████| 22/22 [00:00<00:00, 10750.87 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 957.82 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 22/22 [00:00<00:00, 295.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 244.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.8093219995498657, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.215, 'eval_steps_per_second': 12.215, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:05,  1.96s/it]

{'eval_loss': 1.1879950761795044, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.22, 'eval_steps_per_second': 12.22, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  2.04s/it]

{'eval_loss': 1.0283644199371338, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.22, 'eval_steps_per_second': 12.22, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.25s/it]


{'train_runtime': 20.2546, 'train_samples_per_second': 3.259, 'train_steps_per_second': 0.444, 'train_loss': 1.4011264377170138, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.49it/s]
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
100%|██████████| 1/1 [00:00<00:00, 141.59it/s]
Casting the dataset: 100%|██████████| 22/22 [00:00<00:00, 21996.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 961.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 22/22 [00:00<00:00, 263.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.8093219995498657, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.058, 'eval_steps_per_second': 12.058, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:05,  1.96s/it]

{'eval_loss': 1.1879950761795044, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.071, 'eval_steps_per_second': 12.071, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  2.02s/it]

{'eval_loss': 1.0283644199371338, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.199, 'eval_steps_per_second': 12.199, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.23s/it]


{'train_runtime': 20.0963, 'train_samples_per_second': 3.284, 'train_steps_per_second': 0.448, 'train_loss': 1.4011264377170138, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.82it/s]
100%|██████████| 1/1 [00:00<00:00, 165.05it/s]
100%|██████████| 1/1 [00:00<00:00, 164.88it/s]
Casting the dataset: 100%|██████████| 22/22 [00:00<00:00, 10788.58 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 22/22 [00:00<00:00, 288.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.8093219995498657, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0749, 'eval_samples_per_second': 13.354, 'eval_steps_per_second': 13.354, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:05,  1.98s/it]

{'eval_loss': 1.1879950761795044, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.091, 'eval_samples_per_second': 10.991, 'eval_steps_per_second': 10.991, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  2.02s/it]

{'eval_loss': 1.0283644199371338, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.057, 'eval_steps_per_second': 12.057, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.25s/it]


{'train_runtime': 20.2814, 'train_samples_per_second': 3.254, 'train_steps_per_second': 0.444, 'train_loss': 1.4011264377170138, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 141.19it/s]
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
Casting the dataset: 100%|██████████| 22/22 [00:00<00:00, 11000.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 22/22 [00:00<00:00, 304.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.8093219995498657, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.374, 'eval_steps_per_second': 12.374, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:05,  1.97s/it]

{'eval_loss': 1.1879950761795044, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.06, 'eval_steps_per_second': 12.06, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  2.01s/it]

{'eval_loss': 1.0283644199371338, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.19, 'eval_steps_per_second': 13.19, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.23s/it]


{'train_runtime': 20.1081, 'train_samples_per_second': 3.282, 'train_steps_per_second': 0.448, 'train_loss': 1.4011264377170138, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.81it/s]
100%|██████████| 1/1 [00:00<00:00, 165.19it/s]
100%|██████████| 1/1 [00:00<00:00, 164.83it/s]
Casting the dataset: 100%|██████████| 22/22 [00:00<00:00, 22001.59 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 22/22 [00:00<00:00, 295.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.8093219995498657, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.365, 'eval_steps_per_second': 12.365, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:05,  1.96s/it]

{'eval_loss': 1.1879950761795044, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.788, 'eval_steps_per_second': 11.788, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.06s/it]

{'eval_loss': 1.0283644199371338, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.004, 'eval_steps_per_second': 13.004, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.27s/it]


{'train_runtime': 20.4268, 'train_samples_per_second': 3.231, 'train_steps_per_second': 0.441, 'train_loss': 1.4011264377170138, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
100%|██████████| 1/1 [00:00<00:00, 164.59it/s]
100%|██████████| 1/1 [00:00<00:00, 141.79it/s]
Casting the dataset: 100%|██████████| 23/23 [00:00<00:00, 11500.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 23/23 [00:00<00:00, 275.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.8099994659423828, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.511, 'eval_steps_per_second': 11.511, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:06,  2.06s/it]

{'eval_loss': 1.1923273801803589, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.017, 'eval_steps_per_second': 13.017, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.08s/it]

{'eval_loss': 1.0316375494003296, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.916, 'eval_steps_per_second': 11.916, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.29s/it]


{'train_runtime': 20.6487, 'train_samples_per_second': 3.342, 'train_steps_per_second': 0.436, 'train_loss': 1.4000723097059462, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 164.99it/s]
100%|██████████| 1/1 [00:00<00:00, 165.16it/s]
Casting the dataset: 100%|██████████| 23/23 [00:00<00:00, 11498.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 952.17 examples/s]
Map: 100%|██████████| 23/23 [00:00<00:00, 298.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.83 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.8099994659423828, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0835, 'eval_samples_per_second': 11.98, 'eval_steps_per_second': 11.98, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:06,  2.07s/it]

{'eval_loss': 1.1923273801803589, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.074, 'eval_steps_per_second': 12.074, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.12s/it]

{'eval_loss': 1.0316375494003296, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.089, 'eval_samples_per_second': 11.241, 'eval_steps_per_second': 11.241, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.32s/it]


{'train_runtime': 20.8741, 'train_samples_per_second': 3.306, 'train_steps_per_second': 0.431, 'train_loss': 1.4000723097059462, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 165.05it/s]
Casting the dataset: 100%|██████████| 23/23 [00:00<00:00, 11500.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 960.89 examples/s]
Map: 100%|██████████| 23/23 [00:00<00:00, 275.64 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.8099994659423828, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.047, 'eval_steps_per_second': 12.047, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:06,  2.03s/it]

{'eval_loss': 1.1923273801803589, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.371, 'eval_steps_per_second': 12.371, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.10s/it]

{'eval_loss': 1.0316375494003296, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.003, 'eval_steps_per_second': 13.003, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.29s/it]


{'train_runtime': 20.5921, 'train_samples_per_second': 3.351, 'train_steps_per_second': 0.437, 'train_loss': 1.4000723097059462, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 141.68it/s]
100%|██████████| 1/1 [00:00<00:00, 164.67it/s]
Casting the dataset: 100%|██████████| 23/23 [00:00<00:00, 11499.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 23/23 [00:00<00:00, 313.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.19 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.8099994659423828, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.021, 'eval_steps_per_second': 13.021, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:06,  2.05s/it]

{'eval_loss': 1.1923273801803589, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0897, 'eval_samples_per_second': 11.151, 'eval_steps_per_second': 11.151, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.10s/it]

{'eval_loss': 1.0316375494003296, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.006, 'eval_steps_per_second': 13.006, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.30s/it]


{'train_runtime': 20.7192, 'train_samples_per_second': 3.33, 'train_steps_per_second': 0.434, 'train_loss': 1.4000723097059462, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.22it/s]
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 23/23 [00:00<00:00, 11500.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 23/23 [00:00<00:00, 275.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.8099994659423828, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.851, 'eval_steps_per_second': 12.851, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:06,  2.01s/it]

{'eval_loss': 1.1923273801803589, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0874, 'eval_samples_per_second': 11.444, 'eval_steps_per_second': 11.444, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.07s/it]

{'eval_loss': 1.0316375494003296, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.781, 'eval_steps_per_second': 11.781, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.28s/it]


{'train_runtime': 20.4996, 'train_samples_per_second': 3.366, 'train_steps_per_second': 0.439, 'train_loss': 1.4000723097059462, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.07it/s]
100%|██████████| 1/1 [00:00<00:00, 165.06it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 24/24 [00:00<00:00, 12003.73 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 967.10 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 279.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.73 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.8129112720489502, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.383, 'eval_steps_per_second': 12.383, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:06,  2.11s/it]

{'eval_loss': 1.190026879310608, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.013, 'eval_steps_per_second': 13.013, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.18s/it]

{'eval_loss': 1.0246325731277466, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0789, 'eval_samples_per_second': 12.68, 'eval_steps_per_second': 12.68, 'epoch': 3.0}


100%|██████████| 9/9 [00:21<00:00,  2.36s/it]


{'train_runtime': 21.2086, 'train_samples_per_second': 3.395, 'train_steps_per_second': 0.424, 'train_loss': 1.3989398744371202, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.21it/s]
100%|██████████| 1/1 [00:00<00:00, 164.17it/s]
100%|██████████| 1/1 [00:00<00:00, 141.36it/s]
Casting the dataset: 100%|██████████| 24/24 [00:00<00:00, 23996.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 280.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.8129112720489502, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.216, 'eval_steps_per_second': 12.216, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:13<00:06,  2.15s/it]

{'eval_loss': 1.190026879310608, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.78, 'eval_steps_per_second': 11.78, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:20<00:00,  2.21s/it]

{'eval_loss': 1.0246325731277466, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.786, 'eval_steps_per_second': 11.786, 'epoch': 3.0}


100%|██████████| 9/9 [00:21<00:00,  2.39s/it]


{'train_runtime': 21.5219, 'train_samples_per_second': 3.345, 'train_steps_per_second': 0.418, 'train_loss': 1.3989398744371202, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.13it/s]
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
100%|██████████| 1/1 [00:00<00:00, 165.14it/s]
Casting the dataset: 100%|██████████| 24/24 [00:00<00:00, 24007.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 263.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.69 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.8129112720489502, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.227, 'eval_steps_per_second': 12.227, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:06,  2.10s/it]

{'eval_loss': 1.190026879310608, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.23, 'eval_steps_per_second': 12.23, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.18s/it]

{'eval_loss': 1.0246325731277466, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.383, 'eval_steps_per_second': 12.383, 'epoch': 3.0}


100%|██████████| 9/9 [00:21<00:00,  2.35s/it]


{'train_runtime': 21.1211, 'train_samples_per_second': 3.409, 'train_steps_per_second': 0.426, 'train_loss': 1.3989398744371202, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
100%|██████████| 1/1 [00:00<00:00, 164.91it/s]
100%|██████████| 1/1 [00:00<00:00, 141.71it/s]
Casting the dataset: 100%|██████████| 24/24 [00:00<00:00, 11763.85 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 959.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 292.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 468.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.8129112720489502, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.638, 'eval_steps_per_second': 11.638, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:13<00:06,  2.16s/it]

{'eval_loss': 1.190026879310608, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0935, 'eval_samples_per_second': 10.699, 'eval_steps_per_second': 10.699, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:20<00:00,  2.23s/it]

{'eval_loss': 1.0246325731277466, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.531, 'eval_steps_per_second': 12.531, 'epoch': 3.0}


100%|██████████| 9/9 [00:21<00:00,  2.41s/it]


{'train_runtime': 21.6667, 'train_samples_per_second': 3.323, 'train_steps_per_second': 0.415, 'train_loss': 1.3989398744371202, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
100%|██████████| 1/1 [00:00<00:00, 200.02it/s]
100%|██████████| 1/1 [00:00<00:00, 200.02it/s]
Casting the dataset: 100%|██████████| 24/24 [00:00<00:00, 12005.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 259.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.8129112720489502, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.66, 'eval_steps_per_second': 11.66, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:06,  2.14s/it]

{'eval_loss': 1.190026879310608, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.044, 'eval_steps_per_second': 12.044, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.17s/it]

{'eval_loss': 1.0246325731277466, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.018, 'eval_steps_per_second': 13.018, 'epoch': 3.0}


100%|██████████| 9/9 [00:21<00:00,  2.36s/it]


{'train_runtime': 21.238, 'train_samples_per_second': 3.39, 'train_steps_per_second': 0.424, 'train_loss': 1.3989398744371202, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.29it/s]
100%|██████████| 1/1 [00:00<00:00, 165.35it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
Casting the dataset: 100%|██████████| 25/25 [00:00<00:00, 25001.81 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 25/25 [00:00<00:00, 264.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.37 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pr

{'eval_loss': 1.5119916200637817, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8809523809523809, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.774, 'eval_steps_per_second': 11.774, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.61s/it]

{'eval_loss': 0.8924253582954407, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.211, 'eval_steps_per_second': 12.211, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:22<00:00,  1.64s/it]

{'eval_loss': 0.8690533638000488, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.753, 'eval_steps_per_second': 11.753, 'epoch': 3.0}


100%|██████████| 12/12 [00:23<00:00,  1.96s/it]


{'train_runtime': 23.5532, 'train_samples_per_second': 3.184, 'train_steps_per_second': 0.509, 'train_loss': 1.2141791979471843, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 152.25it/s]
100%|██████████| 1/1 [00:00<00:00, 141.62it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 25/25 [00:00<00:00, 25001.81 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 25/25 [00:00<00:00, 279.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0935477018356323, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.206, 'eval_steps_per_second': 12.206, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.60s/it]

{'eval_loss': 1.0007336139678955, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.253, 'eval_steps_per_second': 12.253, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:22<00:00,  1.61s/it]

{'eval_loss': 1.0218474864959717, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.644, 'eval_steps_per_second': 11.644, 'epoch': 3.0}


100%|██████████| 12/12 [00:23<00:00,  1.96s/it]


{'train_runtime': 23.4945, 'train_samples_per_second': 3.192, 'train_steps_per_second': 0.511, 'train_loss': 1.0437052249908447, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
Casting the dataset: 100%|██████████| 25/25 [00:00<00:00, 25001.81 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 25/25 [00:00<00:00, 253.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.0935477018356323, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.071, 'eval_steps_per_second': 12.071, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.59s/it]

{'eval_loss': 1.0007336139678955, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.373, 'eval_steps_per_second': 11.373, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:22<00:00,  1.62s/it]

{'eval_loss': 1.0218474864959717, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.272, 'eval_steps_per_second': 12.272, 'epoch': 3.0}


100%|██████████| 12/12 [00:23<00:00,  1.96s/it]


{'train_runtime': 23.5163, 'train_samples_per_second': 3.189, 'train_steps_per_second': 0.51, 'train_loss': 1.0437052249908447, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
100%|██████████| 1/1 [00:00<00:00, 164.08it/s]
100%|██████████| 1/1 [00:00<00:00, 164.83it/s]
Casting the dataset: 100%|██████████| 25/25 [00:00<00:00, 12494.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.85 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 25/25 [00:00<00:00, 274.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 488.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0935477018356323, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.84, 'eval_steps_per_second': 12.84, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.59s/it]

{'eval_loss': 1.0007336139678955, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.208, 'eval_steps_per_second': 12.208, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:22<00:00,  1.60s/it]

{'eval_loss': 1.0218474864959717, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0835, 'eval_samples_per_second': 11.982, 'eval_steps_per_second': 11.982, 'epoch': 3.0}


100%|██████████| 12/12 [00:23<00:00,  1.95s/it]


{'train_runtime': 23.3633, 'train_samples_per_second': 3.21, 'train_steps_per_second': 0.514, 'train_loss': 1.0437052249908447, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 124.21it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
Casting the dataset: 100%|██████████| 25/25 [00:00<00:00, 25025.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 25/25 [00:00<00:00, 248.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.0935477018356323, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.537, 'eval_steps_per_second': 12.537, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.60s/it]

{'eval_loss': 1.0007336139678955, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0774, 'eval_samples_per_second': 12.913, 'eval_steps_per_second': 12.913, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:22<00:00,  1.63s/it]

{'eval_loss': 1.0218474864959717, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.219, 'eval_steps_per_second': 12.219, 'epoch': 3.0}


100%|██████████| 12/12 [00:23<00:00,  1.97s/it]


{'train_runtime': 23.6878, 'train_samples_per_second': 3.166, 'train_steps_per_second': 0.507, 'train_loss': 1.0437052249908447, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.84it/s]
100%|██████████| 1/1 [00:00<00:00, 165.15it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 26/26 [00:00<00:00, 12999.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Map: 100%|██████████| 26/26 [00:00<00:00, 253.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 486.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.092214822769165, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.195, 'eval_steps_per_second': 13.195, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.66s/it]

{'eval_loss': 0.995448887348175, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.221, 'eval_steps_per_second': 12.221, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:22<00:00,  1.67s/it]

{'eval_loss': 1.0110973119735718, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.071, 'eval_steps_per_second': 12.071, 'epoch': 3.0}


100%|██████████| 12/12 [00:23<00:00,  1.98s/it]


{'train_runtime': 23.7922, 'train_samples_per_second': 3.278, 'train_steps_per_second': 0.504, 'train_loss': 1.037314732869466, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
100%|██████████| 1/1 [00:00<00:00, 165.03it/s]
Casting the dataset: 100%|██████████| 26/26 [00:00<00:00, 25057.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Map: 100%|██████████| 26/26 [00:00<00:00, 253.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.092214822769165, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.788, 'eval_steps_per_second': 11.788, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.67s/it]

{'eval_loss': 0.995448887348175, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.521, 'eval_steps_per_second': 12.521, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:22<00:00,  1.71s/it]

{'eval_loss': 1.0110973119735718, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.374, 'eval_steps_per_second': 13.374, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.01s/it]


{'train_runtime': 24.1442, 'train_samples_per_second': 3.231, 'train_steps_per_second': 0.497, 'train_loss': 1.037314732869466, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.50it/s]
100%|██████████| 1/1 [00:00<00:00, 164.74it/s]
100%|██████████| 1/1 [00:00<00:00, 141.78it/s]
Casting the dataset: 100%|██████████| 26/26 [00:00<00:00, 12696.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 963.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.26 examples/s]
Map: 100%|██████████| 26/26 [00:00<00:00, 244.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.092214822769165, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.225, 'eval_steps_per_second': 12.225, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.63s/it]

{'eval_loss': 0.995448887348175, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0959, 'eval_samples_per_second': 10.425, 'eval_steps_per_second': 10.425, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:22<00:00,  1.68s/it]

{'eval_loss': 1.0110973119735718, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.206, 'eval_steps_per_second': 12.206, 'epoch': 3.0}


100%|██████████| 12/12 [00:23<00:00,  1.98s/it]


{'train_runtime': 23.7813, 'train_samples_per_second': 3.28, 'train_steps_per_second': 0.505, 'train_loss': 1.037314732869466, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.33it/s]
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
100%|██████████| 1/1 [00:00<00:00, 165.27it/s]
Casting the dataset: 100%|██████████| 26/26 [00:00<00:00, 12783.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 964.65 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 26/26 [00:00<00:00, 248.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 486.75 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.092214822769165, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.073, 'eval_steps_per_second': 12.073, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.68s/it]

{'eval_loss': 0.995448887348175, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0845, 'eval_samples_per_second': 11.84, 'eval_steps_per_second': 11.84, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:22<00:00,  1.70s/it]

{'eval_loss': 1.0110973119735718, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.075, 'eval_samples_per_second': 13.341, 'eval_steps_per_second': 13.341, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.00s/it]


{'train_runtime': 24.0434, 'train_samples_per_second': 3.244, 'train_steps_per_second': 0.499, 'train_loss': 1.037314732869466, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
100%|██████████| 1/1 [00:00<00:00, 164.78it/s]
100%|██████████| 1/1 [00:00<00:00, 164.06it/s]
Casting the dataset: 100%|██████████| 26/26 [00:00<00:00, 12996.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.70 examples/s]
Map: 100%|██████████| 26/26 [00:00<00:00, 246.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 489.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.092214822769165, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.681, 'eval_steps_per_second': 11.681, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.63s/it]

{'eval_loss': 0.995448887348175, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.791, 'eval_steps_per_second': 11.791, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:22<00:00,  1.68s/it]

{'eval_loss': 1.0110973119735718, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0763, 'eval_samples_per_second': 13.108, 'eval_steps_per_second': 13.108, 'epoch': 3.0}


100%|██████████| 12/12 [00:23<00:00,  1.99s/it]


{'train_runtime': 23.8499, 'train_samples_per_second': 3.27, 'train_steps_per_second': 0.503, 'train_loss': 1.037314732869466, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 164.62it/s]
100%|██████████| 1/1 [00:00<00:00, 165.24it/s]
Casting the dataset: 100%|██████████| 27/27 [00:00<00:00, 27001.96 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 27/27 [00:00<00:00, 251.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.35 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.0905712842941284, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.917, 'eval_steps_per_second': 11.917, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:06,  1.72s/it]

{'eval_loss': 0.9930026531219482, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1475, 'eval_samples_per_second': 6.779, 'eval_steps_per_second': 6.779, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.76s/it]

{'eval_loss': 1.0083450078964233, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.515, 'eval_steps_per_second': 12.515, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.06s/it]


{'train_runtime': 24.6867, 'train_samples_per_second': 3.281, 'train_steps_per_second': 0.486, 'train_loss': 1.0364750226338704, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.33it/s]
100%|██████████| 1/1 [00:00<00:00, 165.18it/s]
100%|██████████| 1/1 [00:00<00:00, 164.56it/s]
Casting the dataset: 100%|██████████| 27/27 [00:00<00:00, 13517.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 27/27 [00:00<00:00, 240.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.37 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.0905712842941284, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.78, 'eval_steps_per_second': 11.78, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:06,  1.73s/it]

{'eval_loss': 0.9930026531219482, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.176, 'eval_steps_per_second': 12.176, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.74s/it]

{'eval_loss': 1.0083450078964233, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.09, 'eval_samples_per_second': 11.109, 'eval_steps_per_second': 11.109, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.03s/it]


{'train_runtime': 24.417, 'train_samples_per_second': 3.317, 'train_steps_per_second': 0.491, 'train_loss': 1.0364750226338704, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.88it/s]
100%|██████████| 1/1 [00:00<00:00, 141.32it/s]
100%|██████████| 1/1 [00:00<00:00, 165.23it/s]
Casting the dataset: 100%|██████████| 27/27 [00:00<00:00, 13497.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 27/27 [00:00<00:00, 233.64 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0905712842941284, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.091, 'eval_samples_per_second': 10.984, 'eval_steps_per_second': 10.984, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:06,  1.74s/it]

{'eval_loss': 0.9930026531219482, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1577, 'eval_samples_per_second': 6.342, 'eval_steps_per_second': 6.342, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.79s/it]

{'eval_loss': 1.0083450078964233, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1363, 'eval_samples_per_second': 7.334, 'eval_steps_per_second': 7.334, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.08s/it]


{'train_runtime': 24.9707, 'train_samples_per_second': 3.244, 'train_steps_per_second': 0.481, 'train_loss': 1.0364750226338704, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
100%|██████████| 1/1 [00:00<00:00, 164.89it/s]
100%|██████████| 1/1 [00:00<00:00, 152.39it/s]
Casting the dataset: 100%|██████████| 27/27 [00:00<00:00, 13499.37 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Map: 100%|██████████| 27/27 [00:00<00:00, 263.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 277.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.0905712842941284, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1465, 'eval_samples_per_second': 6.828, 'eval_steps_per_second': 6.828, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.71s/it]

{'eval_loss': 0.9930026531219482, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.246, 'eval_steps_per_second': 11.246, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:22<00:00,  1.74s/it]

{'eval_loss': 1.0083450078964233, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.924, 'eval_steps_per_second': 11.924, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.02s/it]


{'train_runtime': 24.2695, 'train_samples_per_second': 3.338, 'train_steps_per_second': 0.494, 'train_loss': 1.0364750226338704, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.87it/s]
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
Casting the dataset: 100%|██████████| 27/27 [00:00<00:00, 26982.66 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 959.14 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 27/27 [00:00<00:00, 220.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0905712842941284, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.504, 'eval_steps_per_second': 11.504, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:06,  1.72s/it]

{'eval_loss': 0.9930026531219482, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0899, 'eval_samples_per_second': 11.121, 'eval_steps_per_second': 11.121, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.78s/it]

{'eval_loss': 1.0083450078964233, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1328, 'eval_samples_per_second': 7.529, 'eval_steps_per_second': 7.529, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.05s/it]


{'train_runtime': 24.6252, 'train_samples_per_second': 3.289, 'train_steps_per_second': 0.487, 'train_loss': 1.0364750226338704, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.97it/s]
100%|██████████| 1/1 [00:00<00:00, 165.18it/s]
100%|██████████| 1/1 [00:00<00:00, 141.96it/s]
Casting the dataset: 100%|██████████| 28/28 [00:00<00:00, 27982.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 238.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.37 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
Yo

{'eval_loss': 1.0916048288345337, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0797, 'eval_samples_per_second': 12.54, 'eval_steps_per_second': 12.54, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.80s/it]

{'eval_loss': 0.9906946420669556, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.362, 'eval_steps_per_second': 12.362, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.80s/it]

{'eval_loss': 1.0033020973205566, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.369, 'eval_steps_per_second': 12.369, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.08s/it]


{'train_runtime': 24.9789, 'train_samples_per_second': 3.363, 'train_steps_per_second': 0.48, 'train_loss': 1.0341133276621501, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.71it/s]
100%|██████████| 1/1 [00:00<00:00, 164.62it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
Casting the dataset: 100%|██████████| 28/28 [00:00<00:00, 28055.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 961.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 222.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.0916048288345337, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.768, 'eval_steps_per_second': 11.768, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.79s/it]

{'eval_loss': 0.9906946420669556, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.373, 'eval_steps_per_second': 13.373, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.84s/it]

{'eval_loss': 1.0033020973205566, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.067, 'eval_steps_per_second': 12.067, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.10s/it]


{'train_runtime': 25.178, 'train_samples_per_second': 3.336, 'train_steps_per_second': 0.477, 'train_loss': 1.0341133276621501, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.79it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 165.14it/s]
Casting the dataset: 100%|██████████| 28/28 [00:00<00:00, 13996.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 953.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 221.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 486.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0916048288345337, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.212, 'eval_steps_per_second': 12.212, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.79s/it]

{'eval_loss': 0.9906946420669556, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.526, 'eval_steps_per_second': 12.526, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.82s/it]

{'eval_loss': 1.0033020973205566, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.992, 'eval_steps_per_second': 11.992, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.08s/it]


{'train_runtime': 25.0177, 'train_samples_per_second': 3.358, 'train_steps_per_second': 0.48, 'train_loss': 1.0341133276621501, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.24it/s]
100%|██████████| 1/1 [00:00<00:00, 141.24it/s]
100%|██████████| 1/1 [00:00<00:00, 164.26it/s]
Casting the dataset: 100%|██████████| 28/28 [00:00<00:00, 13740.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 958.48 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 251.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.0916048288345337, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.197, 'eval_steps_per_second': 13.197, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.77s/it]

{'eval_loss': 0.9906946420669556, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.923, 'eval_steps_per_second': 11.923, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.84s/it]

{'eval_loss': 1.0033020973205566, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.215, 'eval_steps_per_second': 12.215, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.08s/it]


{'train_runtime': 25.0169, 'train_samples_per_second': 3.358, 'train_steps_per_second': 0.48, 'train_loss': 1.0341133276621501, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.27it/s]
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
100%|██████████| 1/1 [00:00<00:00, 166.64it/s]
Casting the dataset: 100%|██████████| 28/28 [00:00<00:00, 28002.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.50 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 222.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.47 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.0916048288345337, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.364, 'eval_steps_per_second': 12.364, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.79s/it]

{'eval_loss': 0.9906946420669556, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.637, 'eval_steps_per_second': 11.637, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.81s/it]

{'eval_loss': 1.0033020973205566, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.23, 'eval_steps_per_second': 12.23, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.08s/it]


{'train_runtime': 24.9703, 'train_samples_per_second': 3.364, 'train_steps_per_second': 0.481, 'train_loss': 1.0341133276621501, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 165.00it/s]
100%|██████████| 1/1 [00:00<00:00, 141.61it/s]
Casting the dataset: 100%|██████████| 29/29 [00:00<00:00, 29002.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Map: 100%|██████████| 29/29 [00:00<00:00, 244.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
Yo

{'eval_loss': 1.0932033061981201, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.142, 'eval_steps_per_second': 12.142, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.89s/it]

{'eval_loss': 0.9913417100906372, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0857, 'eval_samples_per_second': 11.669, 'eval_steps_per_second': 11.669, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.91s/it]

{'eval_loss': 1.002610445022583, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 12.999, 'eval_steps_per_second': 12.999, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.16s/it]


{'train_runtime': 25.8985, 'train_samples_per_second': 3.359, 'train_steps_per_second': 0.463, 'train_loss': 1.0334021250406902, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 164.64it/s]
100%|██████████| 1/1 [00:00<00:00, 164.80it/s]
Casting the dataset: 100%|██████████| 29/29 [00:00<00:00, 14494.14 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 29/29 [00:00<00:00, 231.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.69 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.0932033061981201, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.367, 'eval_steps_per_second': 12.367, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.84s/it]

{'eval_loss': 0.9913417100906372, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.372, 'eval_steps_per_second': 12.372, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.86s/it]

{'eval_loss': 1.002610445022583, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.22, 'eval_steps_per_second': 12.22, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.11s/it]


{'train_runtime': 25.2938, 'train_samples_per_second': 3.44, 'train_steps_per_second': 0.474, 'train_loss': 1.0334021250406902, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.20it/s]
100%|██████████| 1/1 [00:00<00:00, 164.82it/s]
100%|██████████| 1/1 [00:00<00:00, 141.07it/s]
Casting the dataset: 100%|██████████| 29/29 [00:00<00:00, 14485.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 29/29 [00:00<00:00, 246.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.19 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0932033061981201, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0759, 'eval_samples_per_second': 13.17, 'eval_steps_per_second': 13.17, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.85s/it]

{'eval_loss': 0.9913417100906372, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.188, 'eval_steps_per_second': 13.188, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.88s/it]

{'eval_loss': 1.002610445022583, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.504, 'eval_steps_per_second': 11.504, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.12s/it]


{'train_runtime': 25.4002, 'train_samples_per_second': 3.425, 'train_steps_per_second': 0.472, 'train_loss': 1.0334021250406902, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 164.58it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 29/29 [00:00<00:00, 14501.05 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 29/29 [00:00<00:00, 225.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.0932033061981201, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.076, 'eval_steps_per_second': 12.076, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.83s/it]

{'eval_loss': 0.9913417100906372, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.071, 'eval_steps_per_second': 12.071, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.88s/it]

{'eval_loss': 1.002610445022583, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.853, 'eval_steps_per_second': 12.853, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.11s/it]


{'train_runtime': 25.3122, 'train_samples_per_second': 3.437, 'train_steps_per_second': 0.474, 'train_loss': 1.0334021250406902, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.71it/s]
100%|██████████| 1/1 [00:00<00:00, 165.07it/s]
100%|██████████| 1/1 [00:00<00:00, 166.39it/s]
Casting the dataset: 100%|██████████| 29/29 [00:00<00:00, 14501.05 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 29/29 [00:00<00:00, 233.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.0932033061981201, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0767, 'eval_samples_per_second': 13.032, 'eval_steps_per_second': 13.032, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.85s/it]

{'eval_loss': 0.9913417100906372, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.931, 'eval_steps_per_second': 11.931, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.86s/it]

{'eval_loss': 1.002610445022583, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.365, 'eval_steps_per_second': 12.365, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.12s/it]


{'train_runtime': 25.4174, 'train_samples_per_second': 3.423, 'train_steps_per_second': 0.472, 'train_loss': 1.0334021250406902, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
100%|██████████| 1/1 [00:00<00:00, 163.83it/s]
100%|██████████| 1/1 [00:00<00:00, 164.83it/s]
Casting the dataset: 100%|██████████| 30/30 [00:00<00:00, 15002.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 246.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.32 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.0934386253356934, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.229, 'eval_steps_per_second': 12.229, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.87s/it]

{'eval_loss': 0.9897651076316833, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0789, 'eval_samples_per_second': 12.677, 'eval_steps_per_second': 12.677, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.92s/it]

{'eval_loss': 0.9996261596679688, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.023, 'eval_steps_per_second': 13.023, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.13s/it]


{'train_runtime': 25.5865, 'train_samples_per_second': 3.517, 'train_steps_per_second': 0.469, 'train_loss': 1.0328563054402669, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.59it/s]
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
100%|██████████| 1/1 [00:00<00:00, 164.90it/s]
Casting the dataset: 100%|██████████| 30/30 [00:00<00:00, 14995.72 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 962.66 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 240.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.58 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.0934386253356934, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.133, 'eval_steps_per_second': 12.133, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.91s/it]

{'eval_loss': 0.9897651076316833, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0759, 'eval_samples_per_second': 13.167, 'eval_steps_per_second': 13.167, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.93s/it]

{'eval_loss': 0.9996261596679688, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0805, 'eval_samples_per_second': 12.424, 'eval_steps_per_second': 12.424, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.15s/it]


{'train_runtime': 25.8105, 'train_samples_per_second': 3.487, 'train_steps_per_second': 0.465, 'train_loss': 1.0328563054402669, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.98it/s]
100%|██████████| 1/1 [00:00<00:00, 165.02it/s]
100%|██████████| 1/1 [00:00<00:00, 151.70it/s]
Casting the dataset: 100%|██████████| 30/30 [00:00<00:00, 15008.24 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.69 examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 218.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.0934386253356934, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0767, 'eval_samples_per_second': 13.04, 'eval_steps_per_second': 13.04, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.90s/it]

{'eval_loss': 0.9897651076316833, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0905, 'eval_samples_per_second': 11.049, 'eval_steps_per_second': 11.049, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.91s/it]

{'eval_loss': 0.9996261596679688, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.218, 'eval_steps_per_second': 12.218, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.15s/it]


{'train_runtime': 25.7709, 'train_samples_per_second': 3.492, 'train_steps_per_second': 0.466, 'train_loss': 1.0328563054402669, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.07it/s]
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
100%|██████████| 1/1 [00:00<00:00, 165.17it/s]
Casting the dataset: 100%|██████████| 30/30 [00:00<00:00, 14749.63 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 963.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1705.69 examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 238.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.0934386253356934, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0785, 'eval_samples_per_second': 12.743, 'eval_steps_per_second': 12.743, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.88s/it]

{'eval_loss': 0.9897651076316833, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.366, 'eval_steps_per_second': 12.366, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.93s/it]

{'eval_loss': 0.9996261596679688, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.849, 'eval_steps_per_second': 12.849, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.14s/it]


{'train_runtime': 25.7198, 'train_samples_per_second': 3.499, 'train_steps_per_second': 0.467, 'train_loss': 1.0328563054402669, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.81it/s]
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
100%|██████████| 1/1 [00:00<00:00, 164.73it/s]
Casting the dataset: 100%|██████████| 30/30 [00:00<00:00, 15002.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 234.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.35 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.0934386253356934, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.225, 'eval_steps_per_second': 12.225, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.88s/it]

{'eval_loss': 0.9897651076316833, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.914, 'eval_steps_per_second': 11.914, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.91s/it]

{'eval_loss': 0.9996261596679688, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0911, 'eval_samples_per_second': 10.98, 'eval_steps_per_second': 10.98, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.14s/it]


{'train_runtime': 25.6539, 'train_samples_per_second': 3.508, 'train_steps_per_second': 0.468, 'train_loss': 1.0328563054402669, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.52it/s]
100%|██████████| 1/1 [00:00<00:00, 141.55it/s]
100%|██████████| 1/1 [00:00<00:00, 141.40it/s]
Casting the dataset: 100%|██████████| 31/31 [00:00<00:00, 10180.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Map: 100%|██████████| 31/31 [00:00<00:00, 244.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.0931822061538696, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.852, 'eval_steps_per_second': 12.852, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.95s/it]

{'eval_loss': 0.990702748298645, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.38, 'eval_steps_per_second': 11.38, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.98s/it]

{'eval_loss': 1.000610589981079, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.368, 'eval_steps_per_second': 13.368, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.18s/it]


{'train_runtime': 26.1764, 'train_samples_per_second': 3.553, 'train_steps_per_second': 0.458, 'train_loss': 1.033319075902303, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
100%|██████████| 1/1 [00:00<00:00, 141.38it/s]
100%|██████████| 1/1 [00:00<00:00, 164.80it/s]
Casting the dataset: 100%|██████████| 31/31 [00:00<00:00, 15501.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 485.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 31/31 [00:00<00:00, 231.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 501.29 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.0931822061538696, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0827, 'eval_samples_per_second': 12.091, 'eval_steps_per_second': 12.091, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.95s/it]

{'eval_loss': 0.990702748298645, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.522, 'eval_steps_per_second': 12.522, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  2.00s/it]

{'eval_loss': 1.000610589981079, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0904, 'eval_samples_per_second': 11.061, 'eval_steps_per_second': 11.061, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.19s/it]


{'train_runtime': 26.2251, 'train_samples_per_second': 3.546, 'train_steps_per_second': 0.458, 'train_loss': 1.033319075902303, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 124.01it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 31/31 [00:00<00:00, 31002.25 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 963.76 examples/s]
Map: 100%|██████████| 31/31 [00:00<00:00, 248.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.0931822061538696, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0884, 'eval_samples_per_second': 11.312, 'eval_steps_per_second': 11.312, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.98s/it]

{'eval_loss': 0.990702748298645, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.006, 'eval_steps_per_second': 13.006, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.01s/it]

{'eval_loss': 1.000610589981079, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.07, 'eval_steps_per_second': 12.07, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.21s/it]


{'train_runtime': 26.5628, 'train_samples_per_second': 3.501, 'train_steps_per_second': 0.452, 'train_loss': 1.033319075902303, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.64it/s]
100%|██████████| 1/1 [00:00<00:00, 131.90it/s]
100%|██████████| 1/1 [00:00<00:00, 164.70it/s]
Casting the dataset: 100%|██████████| 31/31 [00:00<00:00, 15243.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 961.78 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 31/31 [00:00<00:00, 223.53 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0931822061538696, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0857, 'eval_samples_per_second': 11.663, 'eval_steps_per_second': 11.663, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.98s/it]

{'eval_loss': 0.990702748298645, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0787, 'eval_samples_per_second': 12.706, 'eval_steps_per_second': 12.706, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  1.98s/it]

{'eval_loss': 1.000610589981079, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.854, 'eval_steps_per_second': 11.854, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.20s/it]


{'train_runtime': 26.3603, 'train_samples_per_second': 3.528, 'train_steps_per_second': 0.455, 'train_loss': 1.033319075902303, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.72it/s]
100%|██████████| 1/1 [00:00<00:00, 164.83it/s]
100%|██████████| 1/1 [00:00<00:00, 164.47it/s]
Casting the dataset: 100%|██████████| 31/31 [00:00<00:00, 15508.52 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.50 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.99 examples/s]
Map: 100%|██████████| 31/31 [00:00<00:00, 230.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 501.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.0931822061538696, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.503, 'eval_steps_per_second': 11.503, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.96s/it]

{'eval_loss': 0.990702748298645, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.297, 'eval_steps_per_second': 12.297, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.98s/it]

{'eval_loss': 1.000610589981079, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0738, 'eval_samples_per_second': 13.555, 'eval_steps_per_second': 13.555, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.18s/it]


{'train_runtime': 26.1265, 'train_samples_per_second': 3.56, 'train_steps_per_second': 0.459, 'train_loss': 1.033319075902303, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 178.68it/s]
100%|██████████| 1/1 [00:00<00:00, 164.42it/s]
100%|██████████| 1/1 [00:00<00:00, 166.63it/s]
Casting the dataset: 100%|██████████| 32/32 [00:00<00:00, 16001.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 237.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 334.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.0933171510696411, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.23, 'eval_steps_per_second': 12.23, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:08,  2.03s/it]

{'eval_loss': 0.9916954040527344, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.781, 'eval_steps_per_second': 11.781, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.04s/it]

{'eval_loss': 1.0019299983978271, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.374, 'eval_steps_per_second': 12.374, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.23s/it]


{'train_runtime': 26.7174, 'train_samples_per_second': 3.593, 'train_steps_per_second': 0.449, 'train_loss': 1.0335756142934163, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.05it/s]
100%|██████████| 1/1 [00:00<00:00, 164.45it/s]
100%|██████████| 1/1 [00:00<00:00, 141.75it/s]
Casting the dataset: 100%|██████████| 32/32 [00:00<00:00, 15707.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 223.24 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.0933171510696411, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.931, 'eval_steps_per_second': 11.931, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:08,  2.05s/it]

{'eval_loss': 0.9916954040527344, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.2, 'eval_steps_per_second': 13.2, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.05s/it]

{'eval_loss': 1.0019299983978271, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0764, 'eval_samples_per_second': 13.092, 'eval_steps_per_second': 13.092, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.23s/it]


{'train_runtime': 26.7988, 'train_samples_per_second': 3.582, 'train_steps_per_second': 0.448, 'train_loss': 1.0335756142934163, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.32it/s]
100%|██████████| 1/1 [00:00<00:00, 141.54it/s]
100%|██████████| 1/1 [00:00<00:00, 164.62it/s]
Casting the dataset: 100%|██████████| 32/32 [00:00<00:00, 15668.66 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 224.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.0933171510696411, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.379, 'eval_steps_per_second': 12.379, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:08,  2.02s/it]

{'eval_loss': 0.9916954040527344, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.216, 'eval_steps_per_second': 12.216, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.03s/it]

{'eval_loss': 1.0019299983978271, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.358, 'eval_steps_per_second': 12.358, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.22s/it]


{'train_runtime': 26.6374, 'train_samples_per_second': 3.604, 'train_steps_per_second': 0.45, 'train_loss': 1.0335756142934163, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.16it/s]
100%|██████████| 1/1 [00:00<00:00, 166.50it/s]
100%|██████████| 1/1 [00:00<00:00, 164.88it/s]
Casting the dataset: 100%|██████████| 32/32 [00:00<00:00, 16001.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 232.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.75 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0933171510696411, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.641, 'eval_steps_per_second': 11.641, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:08,  2.01s/it]

{'eval_loss': 0.9916954040527344, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.012, 'eval_steps_per_second': 13.012, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.04s/it]

{'eval_loss': 1.0019299983978271, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0767, 'eval_samples_per_second': 13.03, 'eval_steps_per_second': 13.03, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.22s/it]


{'train_runtime': 26.5807, 'train_samples_per_second': 3.612, 'train_steps_per_second': 0.451, 'train_loss': 1.0335756142934163, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 124.09it/s]
100%|██████████| 1/1 [00:00<00:00, 164.90it/s]
100%|██████████| 1/1 [00:00<00:00, 164.94it/s]
Casting the dataset: 100%|██████████| 32/32 [00:00<00:00, 32002.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 228.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.0933171510696411, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.211, 'eval_steps_per_second': 12.211, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:08,  2.00s/it]

{'eval_loss': 0.9916954040527344, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.218, 'eval_steps_per_second': 12.218, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.05s/it]

{'eval_loss': 1.0019299983978271, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.768, 'eval_steps_per_second': 11.768, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.22s/it]


{'train_runtime': 26.6364, 'train_samples_per_second': 3.604, 'train_steps_per_second': 0.451, 'train_loss': 1.0335756142934163, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.79it/s]
100%|██████████| 1/1 [00:00<00:00, 141.43it/s]
100%|██████████| 1/1 [00:00<00:00, 164.89it/s]
Casting the dataset: 100%|██████████| 33/33 [00:00<00:00, 16477.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 33/33 [00:00<00:00, 211.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9821258783340454, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0845, 'eval_samples_per_second': 11.841, 'eval_steps_per_second': 11.841, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:17<00:07,  1.56s/it]

{'eval_loss': 1.0000557899475098, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.087, 'eval_samples_per_second': 11.488, 'eval_steps_per_second': 11.488, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:27<00:00,  1.58s/it]

{'eval_loss': 0.971116304397583, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0909, 'eval_samples_per_second': 10.997, 'eval_steps_per_second': 10.997, 'epoch': 3.0}


100%|██████████| 15/15 [00:28<00:00,  1.91s/it]


{'train_runtime': 28.6387, 'train_samples_per_second': 3.457, 'train_steps_per_second': 0.524, 'train_loss': 0.9222593307495117, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.95it/s]
100%|██████████| 1/1 [00:00<00:00, 164.61it/s]
100%|██████████| 1/1 [00:00<00:00, 200.15it/s]
Casting the dataset: 100%|██████████| 33/33 [00:00<00:00, 16501.20 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 33/33 [00:00<00:00, 218.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.56 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.013596534729004, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.054, 'eval_steps_per_second': 12.054, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:07,  1.55s/it]

{'eval_loss': 0.7882801294326782, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.068, 'eval_steps_per_second': 12.068, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:27<00:00,  1.57s/it]

{'eval_loss': 0.8118278384208679, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.511, 'eval_steps_per_second': 11.511, 'epoch': 3.0}


100%|██████████| 15/15 [00:28<00:00,  1.93s/it]


{'train_runtime': 28.8838, 'train_samples_per_second': 3.428, 'train_steps_per_second': 0.519, 'train_loss': 1.071291732788086, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.90it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 164.43it/s]
Casting the dataset: 100%|██████████| 33/33 [00:00<00:00, 16131.94 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 33/33 [00:00<00:00, 214.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.53 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.013596534729004, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.089, 'eval_samples_per_second': 11.238, 'eval_steps_per_second': 11.238, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:17<00:07,  1.55s/it]

{'eval_loss': 0.7882801294326782, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.206, 'eval_steps_per_second': 12.206, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:27<00:00,  1.56s/it]

{'eval_loss': 0.8118278384208679, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.202, 'eval_steps_per_second': 12.202, 'epoch': 3.0}


100%|██████████| 15/15 [00:28<00:00,  1.92s/it]


{'train_runtime': 28.7485, 'train_samples_per_second': 3.444, 'train_steps_per_second': 0.522, 'train_loss': 1.071291732788086, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.68it/s]
100%|██████████| 1/1 [00:00<00:00, 162.69it/s]
100%|██████████| 1/1 [00:00<00:00, 164.50it/s]
Casting the dataset: 100%|██████████| 33/33 [00:00<00:00, 33002.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 962.00 examples/s]
Map: 100%|██████████| 33/33 [00:00<00:00, 220.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.013596534729004, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.221, 'eval_steps_per_second': 12.221, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:17<00:07,  1.55s/it]

{'eval_loss': 0.7882801294326782, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.067, 'eval_steps_per_second': 12.067, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:27<00:00,  1.56s/it]

{'eval_loss': 0.8118278384208679, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.781, 'eval_steps_per_second': 11.781, 'epoch': 3.0}


100%|██████████| 15/15 [00:28<00:00,  1.90s/it]


{'train_runtime': 28.5202, 'train_samples_per_second': 3.471, 'train_steps_per_second': 0.526, 'train_loss': 1.071291732788086, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.33it/s]
100%|██████████| 1/1 [00:00<00:00, 165.50it/s]
100%|██████████| 1/1 [00:00<00:00, 165.27it/s]
Casting the dataset: 100%|██████████| 33/33 [00:00<00:00, 33065.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 33/33 [00:00<00:00, 208.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.013596534729004, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.785, 'eval_steps_per_second': 11.785, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:17<00:07,  1.55s/it]

{'eval_loss': 0.7882801294326782, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.919, 'eval_steps_per_second': 11.919, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:27<00:00,  1.56s/it]

{'eval_loss': 0.8118278384208679, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.066, 'eval_steps_per_second': 12.066, 'epoch': 3.0}


100%|██████████| 15/15 [00:28<00:00,  1.91s/it]


{'train_runtime': 28.6213, 'train_samples_per_second': 3.459, 'train_steps_per_second': 0.524, 'train_loss': 1.071291732788086, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.62it/s]
100%|██████████| 1/1 [00:00<00:00, 164.61it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 34/34 [00:00<00:00, 33994.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 226.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.37 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9901987910270691, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0743, 'eval_samples_per_second': 13.461, 'eval_steps_per_second': 13.461, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.61s/it]

{'eval_loss': 0.7719377279281616, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.909, 'eval_steps_per_second': 11.909, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:27<00:00,  1.62s/it]

{'eval_loss': 0.7985565066337585, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.227, 'eval_steps_per_second': 12.227, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.94s/it]


{'train_runtime': 29.1644, 'train_samples_per_second': 3.497, 'train_steps_per_second': 0.514, 'train_loss': 1.0715878804524739, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.03it/s]
100%|██████████| 1/1 [00:00<00:00, 141.27it/s]
100%|██████████| 1/1 [00:00<00:00, 164.64it/s]
Casting the dataset: 100%|██████████| 34/34 [00:00<00:00, 16997.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 225.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9901987910270691, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.223, 'eval_steps_per_second': 12.223, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.63s/it]

{'eval_loss': 0.7719375491142273, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.21, 'eval_steps_per_second': 12.21, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.64s/it]

{'eval_loss': 0.7985565662384033, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.647, 'eval_steps_per_second': 11.647, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.96s/it]


{'train_runtime': 29.4046, 'train_samples_per_second': 3.469, 'train_steps_per_second': 0.51, 'train_loss': 1.0715878804524739, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.93it/s]
100%|██████████| 1/1 [00:00<00:00, 141.40it/s]
100%|██████████| 1/1 [00:00<00:00, 197.44it/s]
Casting the dataset: 100%|██████████| 34/34 [00:00<00:00, 34002.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 963.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 214.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9901987910270691, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.068, 'eval_steps_per_second': 12.068, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.60s/it]

{'eval_loss': 0.7719377279281616, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.065, 'eval_steps_per_second': 12.065, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:27<00:00,  1.63s/it]

{'eval_loss': 0.7985565066337585, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.081, 'eval_samples_per_second': 12.34, 'eval_steps_per_second': 12.34, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.95s/it]


{'train_runtime': 29.2457, 'train_samples_per_second': 3.488, 'train_steps_per_second': 0.513, 'train_loss': 1.0715878804524739, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 140.99it/s]
100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
100%|██████████| 1/1 [00:00<00:00, 163.90it/s]
Casting the dataset: 100%|██████████| 34/34 [00:00<00:00, 17013.40 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.74 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 225.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 480.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.9901987910270691, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.373, 'eval_steps_per_second': 11.373, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.62s/it]

{'eval_loss': 0.7719377279281616, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.064, 'eval_steps_per_second': 12.064, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.63s/it]

{'eval_loss': 0.7985565066337585, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.063, 'eval_steps_per_second': 12.063, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.96s/it]


{'train_runtime': 29.3512, 'train_samples_per_second': 3.475, 'train_steps_per_second': 0.511, 'train_loss': 1.0715878804524739, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
100%|██████████| 1/1 [00:00<00:00, 140.95it/s]
100%|██████████| 1/1 [00:00<00:00, 164.79it/s]
Casting the dataset: 100%|██████████| 34/34 [00:00<00:00, 16640.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 225.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.35 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9901987910270691, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0774, 'eval_samples_per_second': 12.924, 'eval_steps_per_second': 12.924, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.63s/it]

{'eval_loss': 0.7719377279281616, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.036, 'eval_steps_per_second': 12.036, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.62s/it]

{'eval_loss': 0.7985565066337585, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.933, 'eval_steps_per_second': 11.933, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.96s/it]


{'train_runtime': 29.3797, 'train_samples_per_second': 3.472, 'train_steps_per_second': 0.511, 'train_loss': 1.0715878804524739, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.96it/s]
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
100%|██████████| 1/1 [00:00<00:00, 164.72it/s]
Casting the dataset: 100%|██████████| 35/35 [00:00<00:00, 17501.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 218.24 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9883539080619812, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.984, 'eval_steps_per_second': 11.984, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.65s/it]

{'eval_loss': 0.7761996388435364, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0845, 'eval_samples_per_second': 11.835, 'eval_steps_per_second': 11.835, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.68s/it]

{'eval_loss': 0.8017064929008484, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.054, 'eval_steps_per_second': 12.054, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.98s/it]


{'train_runtime': 29.6608, 'train_samples_per_second': 3.54, 'train_steps_per_second': 0.506, 'train_loss': 1.0675331115722657, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.63it/s]
100%|██████████| 1/1 [00:00<00:00, 164.80it/s]
100%|██████████| 1/1 [00:00<00:00, 164.42it/s]
Casting the dataset: 100%|██████████| 35/35 [00:00<00:00, 17507.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 220.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.47 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9883539080619812, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.503, 'eval_steps_per_second': 11.503, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.67s/it]

{'eval_loss': 0.7761996388435364, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.631, 'eval_steps_per_second': 11.631, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.67s/it]

{'eval_loss': 0.8017064929008484, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.088, 'eval_samples_per_second': 11.365, 'eval_steps_per_second': 11.365, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.97s/it]


{'train_runtime': 29.6193, 'train_samples_per_second': 3.545, 'train_steps_per_second': 0.506, 'train_loss': 1.0675331115722657, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.70it/s]
100%|██████████| 1/1 [00:00<00:00, 164.40it/s]
100%|██████████| 1/1 [00:00<00:00, 164.91it/s]
Casting the dataset: 100%|██████████| 35/35 [00:00<00:00, 17499.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 820.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 203.24 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 491.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 334.42 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.9883539080619812, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.365, 'eval_steps_per_second': 12.365, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.65s/it]

{'eval_loss': 0.7761996388435364, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.51, 'eval_steps_per_second': 11.51, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.68s/it]

{'eval_loss': 0.8017064929008484, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.087, 'eval_samples_per_second': 11.492, 'eval_steps_per_second': 11.492, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.98s/it]


{'train_runtime': 29.6367, 'train_samples_per_second': 3.543, 'train_steps_per_second': 0.506, 'train_loss': 1.0675331115722657, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.88it/s]
100%|██████████| 1/1 [00:00<00:00, 82.32it/s]
100%|██████████| 1/1 [00:00<00:00, 164.70it/s]
Casting the dataset: 100%|██████████| 35/35 [00:00<00:00, 35010.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 213.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9883539080619812, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.894, 'eval_steps_per_second': 11.894, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.66s/it]

{'eval_loss': 0.7761996388435364, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.51, 'eval_steps_per_second': 12.51, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.68s/it]

{'eval_loss': 0.8017064929008484, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.1538, 'eval_samples_per_second': 6.502, 'eval_steps_per_second': 6.502, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.98s/it]


{'train_runtime': 29.7452, 'train_samples_per_second': 3.53, 'train_steps_per_second': 0.504, 'train_loss': 1.0675331115722657, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.46it/s]
100%|██████████| 1/1 [00:00<00:00, 141.20it/s]
100%|██████████| 1/1 [00:00<00:00, 164.92it/s]
Casting the dataset: 100%|██████████| 35/35 [00:00<00:00, 17495.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 221.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.27 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9883539080619812, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.07, 'eval_steps_per_second': 12.07, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.67s/it]

{'eval_loss': 0.7761996388435364, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0895, 'eval_samples_per_second': 11.174, 'eval_steps_per_second': 11.174, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.68s/it]

{'eval_loss': 0.8017064929008484, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.247, 'eval_steps_per_second': 11.247, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.98s/it]


{'train_runtime': 29.6561, 'train_samples_per_second': 3.541, 'train_steps_per_second': 0.506, 'train_loss': 1.0675331115722657, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
100%|██████████| 1/1 [00:00<00:00, 141.14it/s]
100%|██████████| 1/1 [00:00<00:00, 142.88it/s]
Casting the dataset: 100%|██████████| 36/36 [00:00<00:00, 18009.89 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 223.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 343.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9866529107093811, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.529, 'eval_steps_per_second': 12.529, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.74s/it]

{'eval_loss': 0.7743657231330872, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.646, 'eval_steps_per_second': 11.646, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.78s/it]

{'eval_loss': 0.8016560673713684, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.002, 'eval_steps_per_second': 13.002, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.03s/it]


{'train_runtime': 30.41, 'train_samples_per_second': 3.551, 'train_steps_per_second': 0.493, 'train_loss': 1.0723861694335937, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
100%|██████████| 1/1 [00:00<00:00, 123.84it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
Casting the dataset: 100%|██████████| 36/36 [00:00<00:00, 18001.30 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 232.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 490.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.9866529107093811, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.358, 'eval_steps_per_second': 12.358, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:08,  1.78s/it]

{'eval_loss': 0.7743657231330872, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0864, 'eval_samples_per_second': 11.573, 'eval_steps_per_second': 11.573, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.79s/it]

{'eval_loss': 0.8016560673713684, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.366, 'eval_steps_per_second': 13.366, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.06s/it]


{'train_runtime': 30.8754, 'train_samples_per_second': 3.498, 'train_steps_per_second': 0.486, 'train_loss': 1.0723861694335937, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.07it/s]
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
100%|██████████| 1/1 [00:00<00:00, 165.16it/s]
Casting the dataset: 100%|██████████| 36/36 [00:00<00:00, 18001.30 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 211.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 465.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9866529107093811, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.357, 'eval_steps_per_second': 12.357, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:08,  1.75s/it]

{'eval_loss': 0.7743657231330872, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0835, 'eval_samples_per_second': 11.98, 'eval_steps_per_second': 11.98, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.75s/it]

{'eval_loss': 0.8016560673713684, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0845, 'eval_samples_per_second': 11.838, 'eval_steps_per_second': 11.838, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.02s/it]


{'train_runtime': 30.318, 'train_samples_per_second': 3.562, 'train_steps_per_second': 0.495, 'train_loss': 1.0723861694335937, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 164.44it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
Casting the dataset: 100%|██████████| 36/36 [00:00<00:00, 18001.30 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 188.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.9866529107093811, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.837, 'eval_steps_per_second': 12.837, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:08,  1.77s/it]

{'eval_loss': 0.7743657231330872, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.369, 'eval_steps_per_second': 12.369, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.80s/it]

{'eval_loss': 0.8016560673713684, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.215, 'eval_steps_per_second': 12.215, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.05s/it]


{'train_runtime': 30.7991, 'train_samples_per_second': 3.507, 'train_steps_per_second': 0.487, 'train_loss': 1.0723861694335937, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.51it/s]
100%|██████████| 1/1 [00:00<00:00, 139.42it/s]
100%|██████████| 1/1 [00:00<00:00, 164.84it/s]
Casting the dataset: 100%|██████████| 36/36 [00:00<00:00, 17979.87 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 208.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.73 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.9866529107093811, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.072, 'eval_steps_per_second': 12.072, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:08,  1.76s/it]

{'eval_loss': 0.7743657231330872, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.368, 'eval_steps_per_second': 12.368, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.77s/it]

{'eval_loss': 0.8016560673713684, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.21, 'eval_steps_per_second': 12.21, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.04s/it]


{'train_runtime': 30.5882, 'train_samples_per_second': 3.531, 'train_steps_per_second': 0.49, 'train_loss': 1.0723861694335937, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.51it/s]
100%|██████████| 1/1 [00:00<00:00, 140.95it/s]
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
Casting the dataset: 100%|██████████| 37/37 [00:00<00:00, 18499.14 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.25 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 488.90 examples/s]
Map: 100%|██████████| 37/37 [00:00<00:00, 216.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.73 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9846165776252747, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.371, 'eval_steps_per_second': 12.371, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.83s/it]

{'eval_loss': 0.7771996855735779, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.364, 'eval_steps_per_second': 12.364, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.86s/it]

{'eval_loss': 0.8088386654853821, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.287, 'eval_steps_per_second': 12.287, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.09s/it]


{'train_runtime': 31.3304, 'train_samples_per_second': 3.543, 'train_steps_per_second': 0.479, 'train_loss': 1.0713423411051433, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
100%|██████████| 1/1 [00:00<00:00, 165.18it/s]
100%|██████████| 1/1 [00:00<00:00, 165.24it/s]
Casting the dataset: 100%|██████████| 37/37 [00:00<00:00, 18501.34 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Map: 100%|██████████| 37/37 [00:00<00:00, 205.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 331.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 324.11 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9846165776252747, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.376, 'eval_steps_per_second': 12.376, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:08,  1.80s/it]

{'eval_loss': 0.7771996855735779, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.642, 'eval_steps_per_second': 11.642, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.83s/it]

{'eval_loss': 0.8088386654853821, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.198, 'eval_steps_per_second': 13.198, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.06s/it]


{'train_runtime': 30.9536, 'train_samples_per_second': 3.586, 'train_steps_per_second': 0.485, 'train_loss': 1.0713423411051433, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.95it/s]
100%|██████████| 1/1 [00:00<00:00, 141.51it/s]
100%|██████████| 1/1 [00:00<00:00, 164.92it/s]
Casting the dataset: 100%|██████████| 37/37 [00:00<00:00, 18503.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 37/37 [00:00<00:00, 195.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9846165776252747, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 11.999, 'eval_steps_per_second': 11.999, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:08,  1.79s/it]

{'eval_loss': 0.7771996855735779, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.223, 'eval_steps_per_second': 12.223, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.84s/it]

{'eval_loss': 0.8088386654853821, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.073, 'eval_steps_per_second': 12.073, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.05s/it]


{'train_runtime': 30.7941, 'train_samples_per_second': 3.605, 'train_steps_per_second': 0.487, 'train_loss': 1.0713423411051433, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.35it/s]
100%|██████████| 1/1 [00:00<00:00, 164.77it/s]
100%|██████████| 1/1 [00:00<00:00, 164.74it/s]
Casting the dataset: 100%|██████████| 37/37 [00:00<00:00, 18119.00 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 963.99 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 37/37 [00:00<00:00, 207.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9846165776252747, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.913, 'eval_steps_per_second': 11.913, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.80s/it]

{'eval_loss': 0.7771996855735779, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.686, 'eval_steps_per_second': 12.686, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.82s/it]

{'eval_loss': 0.8088386654853821, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0857, 'eval_samples_per_second': 11.663, 'eval_steps_per_second': 11.663, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.06s/it]


{'train_runtime': 30.9475, 'train_samples_per_second': 3.587, 'train_steps_per_second': 0.485, 'train_loss': 1.0713423411051433, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.72it/s]
100%|██████████| 1/1 [00:00<00:00, 141.74it/s]
100%|██████████| 1/1 [00:00<00:00, 142.90it/s]
Casting the dataset: 100%|██████████| 37/37 [00:00<00:00, 18501.34 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 962.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.30 examples/s]
Map: 100%|██████████| 37/37 [00:00<00:00, 198.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 323.48 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9846165776252747, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.06, 'eval_steps_per_second': 12.06, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.82s/it]

{'eval_loss': 0.7771996855735779, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0865, 'eval_samples_per_second': 11.557, 'eval_steps_per_second': 11.557, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.83s/it]

{'eval_loss': 0.8088386654853821, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.37, 'eval_steps_per_second': 12.37, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.07s/it]


{'train_runtime': 31.0324, 'train_samples_per_second': 3.577, 'train_steps_per_second': 0.483, 'train_loss': 1.0713423411051433, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.08it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 164.86it/s]
Casting the dataset: 100%|██████████| 38/38 [00:00<00:00, 12666.58 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 38/38 [00:00<00:00, 219.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.9823573231697083, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0773, 'eval_samples_per_second': 12.945, 'eval_steps_per_second': 12.945, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.84s/it]

{'eval_loss': 0.7724955081939697, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.851, 'eval_steps_per_second': 12.851, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.85s/it]

{'eval_loss': 0.8093501925468445, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.211, 'eval_steps_per_second': 12.211, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.07s/it]


{'train_runtime': 31.0208, 'train_samples_per_second': 3.675, 'train_steps_per_second': 0.484, 'train_loss': 1.071099090576172, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.11it/s]
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 38/38 [00:00<00:00, 18610.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 963.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 38/38 [00:00<00:00, 205.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.9823573231697083, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0835, 'eval_samples_per_second': 11.979, 'eval_steps_per_second': 11.979, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.84s/it]

{'eval_loss': 0.7724955081939697, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0765, 'eval_samples_per_second': 13.065, 'eval_steps_per_second': 13.065, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.89s/it]

{'eval_loss': 0.8093501925468445, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.367, 'eval_steps_per_second': 12.367, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.09s/it]


{'train_runtime': 31.383, 'train_samples_per_second': 3.633, 'train_steps_per_second': 0.478, 'train_loss': 1.071099090576172, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.60it/s]
100%|██████████| 1/1 [00:00<00:00, 164.58it/s]
100%|██████████| 1/1 [00:00<00:00, 162.42it/s]
Casting the dataset: 100%|██████████| 38/38 [00:00<00:00, 12668.59 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 957.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 38/38 [00:00<00:00, 193.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.50 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9823573231697083, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0759, 'eval_samples_per_second': 13.18, 'eval_steps_per_second': 13.18, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.85s/it]

{'eval_loss': 0.7724955081939697, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0763, 'eval_samples_per_second': 13.104, 'eval_steps_per_second': 13.104, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.85s/it]

{'eval_loss': 0.8093501925468445, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0738, 'eval_samples_per_second': 13.541, 'eval_steps_per_second': 13.541, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.08s/it]


{'train_runtime': 31.2162, 'train_samples_per_second': 3.652, 'train_steps_per_second': 0.481, 'train_loss': 1.071099090576172, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.60it/s]
100%|██████████| 1/1 [00:00<00:00, 164.80it/s]
100%|██████████| 1/1 [00:00<00:00, 140.56it/s]
Casting the dataset: 100%|██████████| 38/38 [00:00<00:00, 18983.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 38/38 [00:00<00:00, 219.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 498.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.53 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9823573231697083, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0743, 'eval_samples_per_second': 13.455, 'eval_steps_per_second': 13.455, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.84s/it]

{'eval_loss': 0.7724955081939697, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.686, 'eval_steps_per_second': 12.686, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.85s/it]

{'eval_loss': 0.8093501925468445, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.365, 'eval_steps_per_second': 12.365, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.07s/it]


{'train_runtime': 31.1134, 'train_samples_per_second': 3.664, 'train_steps_per_second': 0.482, 'train_loss': 1.071099090576172, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.04it/s]
100%|██████████| 1/1 [00:00<00:00, 141.09it/s]
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
Casting the dataset: 100%|██████████| 38/38 [00:00<00:00, 12515.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 38/38 [00:00<00:00, 215.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 481.72 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9823573231697083, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0765, 'eval_samples_per_second': 13.066, 'eval_steps_per_second': 13.066, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.84s/it]

{'eval_loss': 0.7724955081939697, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0773, 'eval_samples_per_second': 12.93, 'eval_steps_per_second': 12.93, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.87s/it]

{'eval_loss': 0.8093501925468445, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.988, 'eval_steps_per_second': 11.988, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.08s/it]


{'train_runtime': 31.1495, 'train_samples_per_second': 3.66, 'train_steps_per_second': 0.482, 'train_loss': 1.071099090576172, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.81it/s]
100%|██████████| 1/1 [00:00<00:00, 41.28it/s]
100%|██████████| 1/1 [00:00<00:00, 165.07it/s]
Casting the dataset: 100%|██████████| 39/39 [00:00<00:00, 19138.63 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 955.42 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 39/39 [00:00<00:00, 209.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.27 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9802429676055908, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.206, 'eval_steps_per_second': 12.206, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.93s/it]

{'eval_loss': 0.7753704786300659, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.088, 'eval_samples_per_second': 11.357, 'eval_steps_per_second': 11.357, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.93s/it]

{'eval_loss': 0.8082249164581299, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.931, 'eval_steps_per_second': 11.931, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.12s/it]


{'train_runtime': 31.7693, 'train_samples_per_second': 3.683, 'train_steps_per_second': 0.472, 'train_loss': 1.071480941772461, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.92it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
100%|██████████| 1/1 [00:00<00:00, 141.21it/s]
Casting the dataset: 100%|██████████| 39/39 [00:00<00:00, 19047.26 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 39/39 [00:00<00:00, 191.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 484.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 335.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.9802429676055908, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.151, 'eval_steps_per_second': 12.151, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.93s/it]

{'eval_loss': 0.7753704786300659, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.0, 'eval_steps_per_second': 12.0, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.92s/it]

{'eval_loss': 0.8082249164581299, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.412, 'eval_steps_per_second': 11.412, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.12s/it]


{'train_runtime': 31.8167, 'train_samples_per_second': 3.677, 'train_steps_per_second': 0.471, 'train_loss': 1.071480941772461, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.54it/s]
100%|██████████| 1/1 [00:00<00:00, 141.65it/s]
100%|██████████| 1/1 [00:00<00:00, 141.30it/s]
Casting the dataset: 100%|██████████| 39/39 [00:00<00:00, 38993.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 39/39 [00:00<00:00, 194.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.63 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9802429676055908, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.789, 'eval_steps_per_second': 11.789, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.94s/it]

{'eval_loss': 0.7753704786300659, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0874, 'eval_samples_per_second': 11.436, 'eval_steps_per_second': 11.436, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.93s/it]

{'eval_loss': 0.8082249164581299, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.059, 'eval_steps_per_second': 12.059, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.13s/it]


{'train_runtime': 31.9719, 'train_samples_per_second': 3.659, 'train_steps_per_second': 0.469, 'train_loss': 1.071480941772461, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.86it/s]
100%|██████████| 1/1 [00:00<00:00, 123.97it/s]
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
Casting the dataset: 100%|██████████| 39/39 [00:00<00:00, 19496.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 39/39 [00:00<00:00, 200.92 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9802429676055908, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.0, 'eval_steps_per_second': 12.0, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.92s/it]

{'eval_loss': 0.7753704786300659, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0783, 'eval_samples_per_second': 12.771, 'eval_steps_per_second': 12.771, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.91s/it]

{'eval_loss': 0.8082249164581299, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.525, 'eval_steps_per_second': 12.525, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.11s/it]


{'train_runtime': 31.6653, 'train_samples_per_second': 3.695, 'train_steps_per_second': 0.474, 'train_loss': 1.071480941772461, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.64it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 164.51it/s]
Casting the dataset: 100%|██████████| 39/39 [00:00<00:00, 19482.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 39/39 [00:00<00:00, 204.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.53 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 488.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9802429676055908, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0843, 'eval_samples_per_second': 11.857, 'eval_steps_per_second': 11.857, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.93s/it]

{'eval_loss': 0.7753704786300659, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.206, 'eval_steps_per_second': 12.206, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.94s/it]

{'eval_loss': 0.8082249164581299, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0817, 'eval_samples_per_second': 12.243, 'eval_steps_per_second': 12.243, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.13s/it]


{'train_runtime': 31.9867, 'train_samples_per_second': 3.658, 'train_steps_per_second': 0.469, 'train_loss': 1.071480941772461, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 40/40 [00:00<00:00, 19613.30 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.70 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 199.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 484.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.9791349768638611, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.781, 'eval_steps_per_second': 11.781, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.99s/it]

{'eval_loss': 0.7710819840431213, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.293, 'eval_steps_per_second': 12.293, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:31<00:00,  1.99s/it]

{'eval_loss': 0.808600902557373, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.914, 'eval_steps_per_second': 11.914, 'epoch': 3.0}


100%|██████████| 15/15 [00:32<00:00,  2.17s/it]


{'train_runtime': 32.5625, 'train_samples_per_second': 3.685, 'train_steps_per_second': 0.461, 'train_loss': 1.073611577351888, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.75it/s]
100%|██████████| 1/1 [00:00<00:00, 165.07it/s]
100%|██████████| 1/1 [00:00<00:00, 164.63it/s]
Casting the dataset: 100%|██████████| 40/40 [00:00<00:00, 40012.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 965.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 196.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9791349768638611, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.212, 'eval_steps_per_second': 12.212, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.98s/it]

{'eval_loss': 0.7710819840431213, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.932, 'eval_steps_per_second': 11.932, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:31<00:00,  2.00s/it]

{'eval_loss': 0.808600902557373, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.367, 'eval_steps_per_second': 12.367, 'epoch': 3.0}


100%|██████████| 15/15 [00:32<00:00,  2.17s/it]


{'train_runtime': 32.4817, 'train_samples_per_second': 3.694, 'train_steps_per_second': 0.462, 'train_loss': 1.073611577351888, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.90it/s]
100%|██████████| 1/1 [00:00<00:00, 165.25it/s]
100%|██████████| 1/1 [00:00<00:00, 165.03it/s]
Casting the dataset: 100%|██████████| 40/40 [00:00<00:00, 19999.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 965.76 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 201.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9791349768638611, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.198, 'eval_steps_per_second': 12.198, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.98s/it]

{'eval_loss': 0.7710819840431213, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.059, 'eval_steps_per_second': 12.059, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:31<00:00,  2.02s/it]

{'eval_loss': 0.808600902557373, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.065, 'eval_steps_per_second': 12.065, 'epoch': 3.0}


100%|██████████| 15/15 [00:32<00:00,  2.17s/it]


{'train_runtime': 32.5265, 'train_samples_per_second': 3.689, 'train_steps_per_second': 0.461, 'train_loss': 1.073611577351888, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.95it/s]
100%|██████████| 1/1 [00:00<00:00, 165.03it/s]
100%|██████████| 1/1 [00:00<00:00, 164.90it/s]
Casting the dataset: 100%|██████████| 40/40 [00:00<00:00, 19989.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 963.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 196.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.86 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.9791349768638611, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.85, 'eval_steps_per_second': 12.85, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.99s/it]

{'eval_loss': 0.7710819840431213, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.774, 'eval_steps_per_second': 11.774, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:31<00:00,  1.99s/it]

{'eval_loss': 0.808600902557373, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.361, 'eval_steps_per_second': 12.361, 'epoch': 3.0}


100%|██████████| 15/15 [00:32<00:00,  2.16s/it]


{'train_runtime': 32.4395, 'train_samples_per_second': 3.699, 'train_steps_per_second': 0.462, 'train_loss': 1.073611577351888, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
100%|██████████| 1/1 [00:00<00:00, 141.65it/s]
100%|██████████| 1/1 [00:00<00:00, 164.59it/s]
Casting the dataset: 100%|██████████| 40/40 [00:00<00:00, 19645.45 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 201.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.9791349768638611, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.371, 'eval_steps_per_second': 12.371, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.94s/it]

{'eval_loss': 0.7710819840431213, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.716, 'eval_steps_per_second': 11.716, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.98s/it]

{'eval_loss': 0.808600902557373, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.292, 'eval_steps_per_second': 12.292, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.13s/it]


{'train_runtime': 31.9423, 'train_samples_per_second': 3.757, 'train_steps_per_second': 0.47, 'train_loss': 1.073611577351888, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.89it/s]
100%|██████████| 1/1 [00:00<00:00, 164.75it/s]
100%|██████████| 1/1 [00:00<00:00, 142.93it/s]
Casting the dataset: 100%|██████████| 41/41 [00:00<00:00, 20511.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.05 examples/s]
Map: 100%|██████████| 41/41 [00:00<00:00, 199.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8097968697547913, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.643, 'eval_steps_per_second': 11.643, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:21<00:09,  1.56s/it]

{'eval_loss': 0.7995384931564331, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0895, 'eval_samples_per_second': 11.178, 'eval_steps_per_second': 11.178, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:33<00:00,  1.57s/it]

{'eval_loss': 0.8005476593971252, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.382, 'eval_steps_per_second': 11.382, 'epoch': 3.0}


100%|██████████| 18/18 [00:34<00:00,  1.91s/it]


{'train_runtime': 34.3951, 'train_samples_per_second': 3.576, 'train_steps_per_second': 0.523, 'train_loss': 0.9778516557481554, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.99it/s]
100%|██████████| 1/1 [00:00<00:00, 164.95it/s]
100%|██████████| 1/1 [00:00<00:00, 164.79it/s]
Casting the dataset: 100%|██████████| 41/41 [00:00<00:00, 20499.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 491.25 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 965.76 examples/s]
Map: 100%|██████████| 41/41 [00:00<00:00, 201.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7980639338493347, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.289, 'eval_steps_per_second': 12.289, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:21<00:09,  1.55s/it]

{'eval_loss': 0.9076198935508728, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.382, 'eval_steps_per_second': 11.382, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:33<00:00,  1.55s/it]

{'eval_loss': 0.8718659281730652, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0899, 'eval_samples_per_second': 11.123, 'eval_steps_per_second': 11.123, 'epoch': 3.0}


100%|██████████| 18/18 [00:34<00:00,  1.91s/it]


{'train_runtime': 34.4519, 'train_samples_per_second': 3.57, 'train_steps_per_second': 0.522, 'train_loss': 0.9085747400919596, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
100%|██████████| 1/1 [00:00<00:00, 76.54it/s]
100%|██████████| 1/1 [00:00<00:00, 165.08it/s]
Casting the dataset: 100%|██████████| 41/41 [00:00<00:00, 20499.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 41/41 [00:00<00:00, 197.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.85 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7980639338493347, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.917, 'eval_steps_per_second': 11.917, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:21<00:09,  1.54s/it]

{'eval_loss': 0.9076198935508728, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0845, 'eval_samples_per_second': 11.839, 'eval_steps_per_second': 11.839, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:32<00:00,  1.55s/it]

{'eval_loss': 0.8718659281730652, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0774, 'eval_samples_per_second': 12.927, 'eval_steps_per_second': 12.927, 'epoch': 3.0}


100%|██████████| 18/18 [00:34<00:00,  1.90s/it]


{'train_runtime': 34.2116, 'train_samples_per_second': 3.595, 'train_steps_per_second': 0.526, 'train_loss': 0.9085747400919596, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.49it/s]
100%|██████████| 1/1 [00:00<00:00, 164.73it/s]
100%|██████████| 1/1 [00:00<00:00, 164.97it/s]
Casting the dataset: 100%|██████████| 41/41 [00:00<00:00, 20481.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Map: 100%|██████████| 41/41 [00:00<00:00, 200.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.7980639338493347, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.216, 'eval_steps_per_second': 12.216, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:21<00:09,  1.55s/it]

{'eval_loss': 0.9076198935508728, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.357, 'eval_steps_per_second': 12.357, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:33<00:00,  1.57s/it]

{'eval_loss': 0.8718659281730652, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.21, 'eval_steps_per_second': 12.21, 'epoch': 3.0}


100%|██████████| 18/18 [00:34<00:00,  1.92s/it]


{'train_runtime': 34.6203, 'train_samples_per_second': 3.553, 'train_steps_per_second': 0.52, 'train_loss': 0.9085747400919596, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.61it/s]
100%|██████████| 1/1 [00:00<00:00, 141.53it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 41/41 [00:00<00:00, 20477.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 491.42 examples/s]
Map: 100%|██████████| 41/41 [00:00<00:00, 206.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.7980639338493347, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0878, 'eval_samples_per_second': 11.384, 'eval_steps_per_second': 11.384, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:21<00:09,  1.53s/it]

{'eval_loss': 0.9076198935508728, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0749, 'eval_samples_per_second': 13.358, 'eval_steps_per_second': 13.358, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:32<00:00,  1.56s/it]

{'eval_loss': 0.8718659281730652, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.217, 'eval_steps_per_second': 12.217, 'epoch': 3.0}


100%|██████████| 18/18 [00:34<00:00,  1.91s/it]


{'train_runtime': 34.3176, 'train_samples_per_second': 3.584, 'train_steps_per_second': 0.525, 'train_loss': 0.9085747400919596, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.80it/s]
100%|██████████| 1/1 [00:00<00:00, 164.82it/s]
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
Casting the dataset: 100%|██████████| 42/42 [00:00<00:00, 21014.05 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 201.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 316.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8003029823303223, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.231, 'eval_steps_per_second': 12.231, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:09,  1.61s/it]

{'eval_loss': 0.9043110609054565, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.916, 'eval_steps_per_second': 11.916, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:33<00:00,  1.62s/it]

{'eval_loss': 0.8684443831443787, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.223, 'eval_steps_per_second': 12.223, 'epoch': 3.0}


100%|██████████| 18/18 [00:35<00:00,  1.95s/it]


{'train_runtime': 35.0531, 'train_samples_per_second': 3.595, 'train_steps_per_second': 0.514, 'train_loss': 0.9078727298312717, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.59it/s]
100%|██████████| 1/1 [00:00<00:00, 141.29it/s]
100%|██████████| 1/1 [00:00<00:00, 164.99it/s]
Casting the dataset: 100%|██████████| 42/42 [00:00<00:00, 20627.72 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 191.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 317.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8003029823303223, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.064, 'eval_steps_per_second': 12.064, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:09,  1.63s/it]

{'eval_loss': 0.9043110609054565, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.365, 'eval_steps_per_second': 12.365, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:33<00:00,  1.63s/it]

{'eval_loss': 0.8684443831443787, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.926, 'eval_steps_per_second': 11.926, 'epoch': 3.0}


100%|██████████| 18/18 [00:35<00:00,  1.96s/it]


{'train_runtime': 35.2657, 'train_samples_per_second': 3.573, 'train_steps_per_second': 0.51, 'train_loss': 0.9078727298312717, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.65it/s]
100%|██████████| 1/1 [00:00<00:00, 163.76it/s]
100%|██████████| 1/1 [00:00<00:00, 151.99it/s]
Casting the dataset: 100%|██████████| 42/42 [00:00<00:00, 20989.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 193.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.63 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8003029823303223, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0773, 'eval_samples_per_second': 12.936, 'eval_steps_per_second': 12.936, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:21<00:09,  1.59s/it]

{'eval_loss': 0.9043110609054565, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.355, 'eval_steps_per_second': 12.355, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:33<00:00,  1.59s/it]

{'eval_loss': 0.8684443831443787, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.369, 'eval_steps_per_second': 12.369, 'epoch': 3.0}


100%|██████████| 18/18 [00:34<00:00,  1.93s/it]


{'train_runtime': 34.7111, 'train_samples_per_second': 3.63, 'train_steps_per_second': 0.519, 'train_loss': 0.9078727298312717, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.72it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 164.75it/s]
Casting the dataset: 100%|██████████| 42/42 [00:00<00:00, 13815.45 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 198.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8003029823303223, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.918, 'eval_steps_per_second': 11.918, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:21<00:09,  1.61s/it]

{'eval_loss': 0.9043110609054565, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.071, 'eval_steps_per_second': 12.071, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:33<00:00,  1.62s/it]

{'eval_loss': 0.8684443831443787, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.081, 'eval_samples_per_second': 12.35, 'eval_steps_per_second': 12.35, 'epoch': 3.0}


100%|██████████| 18/18 [00:34<00:00,  1.94s/it]


{'train_runtime': 34.8511, 'train_samples_per_second': 3.615, 'train_steps_per_second': 0.516, 'train_loss': 0.9078727298312717, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
100%|██████████| 1/1 [00:00<00:00, 140.06it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 42/42 [00:00<00:00, 21021.57 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 470.06 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 192.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 485.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8003029823303223, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.069, 'eval_steps_per_second': 12.069, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:09,  1.60s/it]

{'eval_loss': 0.9043110609054565, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.36, 'eval_steps_per_second': 12.36, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:33<00:00,  1.61s/it]

{'eval_loss': 0.8684443831443787, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0875, 'eval_samples_per_second': 11.43, 'eval_steps_per_second': 11.43, 'epoch': 3.0}


100%|██████████| 18/18 [00:34<00:00,  1.94s/it]


{'train_runtime': 34.9511, 'train_samples_per_second': 3.605, 'train_steps_per_second': 0.515, 'train_loss': 0.9078727298312717, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.39it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 164.63it/s]
Casting the dataset: 100%|██████████| 43/43 [00:00<00:00, 21514.38 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 194.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.801582396030426, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.48, 'eval_steps_per_second': 11.48, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:09,  1.64s/it]

{'eval_loss': 0.904285192489624, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.067, 'eval_steps_per_second': 12.067, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:34<00:00,  1.68s/it]

{'eval_loss': 0.8684636354446411, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.095, 'eval_samples_per_second': 10.53, 'eval_steps_per_second': 10.53, 'epoch': 3.0}


100%|██████████| 18/18 [00:35<00:00,  1.97s/it]


{'train_runtime': 35.4456, 'train_samples_per_second': 3.639, 'train_steps_per_second': 0.508, 'train_loss': 0.9079080157809787, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 200.02it/s]
100%|██████████| 1/1 [00:00<00:00, 41.24it/s]
100%|██████████| 1/1 [00:00<00:00, 164.42it/s]
Casting the dataset: 100%|██████████| 43/43 [00:00<00:00, 43013.37 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 189.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.801582396030426, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.027, 'eval_steps_per_second': 12.027, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:09,  1.65s/it]

{'eval_loss': 0.904285192489624, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.058, 'eval_steps_per_second': 12.058, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:33<00:00,  1.66s/it]

{'eval_loss': 0.8684636354446411, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0906, 'eval_samples_per_second': 11.033, 'eval_steps_per_second': 11.033, 'epoch': 3.0}


100%|██████████| 18/18 [00:35<00:00,  1.96s/it]


{'train_runtime': 35.3213, 'train_samples_per_second': 3.652, 'train_steps_per_second': 0.51, 'train_loss': 0.9079080157809787, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.83it/s]
100%|██████████| 1/1 [00:00<00:00, 164.60it/s]
100%|██████████| 1/1 [00:00<00:00, 141.26it/s]
Casting the dataset: 100%|██████████| 43/43 [00:00<00:00, 21468.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 945.30 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 181.72 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.801582396030426, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.059, 'eval_steps_per_second': 12.059, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:10,  1.67s/it]

{'eval_loss': 0.904285192489624, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.213, 'eval_steps_per_second': 12.213, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:34<00:00,  1.67s/it]

{'eval_loss': 0.8684636354446411, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.774, 'eval_steps_per_second': 11.774, 'epoch': 3.0}


100%|██████████| 18/18 [00:35<00:00,  1.97s/it]


{'train_runtime': 35.4765, 'train_samples_per_second': 3.636, 'train_steps_per_second': 0.507, 'train_loss': 0.9079080157809787, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.63it/s]
100%|██████████| 1/1 [00:00<00:00, 141.30it/s]
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
Casting the dataset: 100%|██████████| 43/43 [00:00<00:00, 14146.61 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 964.21 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 196.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.801582396030426, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0933, 'eval_samples_per_second': 10.722, 'eval_steps_per_second': 10.722, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:09,  1.65s/it]

{'eval_loss': 0.904285192489624, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.205, 'eval_steps_per_second': 12.205, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:34<00:00,  1.66s/it]

{'eval_loss': 0.8684636354446411, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.091, 'eval_samples_per_second': 10.985, 'eval_steps_per_second': 10.985, 'epoch': 3.0}


100%|██████████| 18/18 [00:35<00:00,  1.96s/it]


{'train_runtime': 35.3561, 'train_samples_per_second': 3.649, 'train_steps_per_second': 0.509, 'train_loss': 0.9079080157809787, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.08it/s]
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 43/43 [00:00<00:00, 21499.00 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 963.76 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 190.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.32 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.801582396030426, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.777, 'eval_steps_per_second': 11.777, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:09,  1.65s/it]

{'eval_loss': 0.904285192489624, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.063, 'eval_steps_per_second': 12.063, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:34<00:00,  1.66s/it]

{'eval_loss': 0.8684636354446411, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.09, 'eval_samples_per_second': 11.117, 'eval_steps_per_second': 11.117, 'epoch': 3.0}


100%|██████████| 18/18 [00:35<00:00,  1.97s/it]


{'train_runtime': 35.4373, 'train_samples_per_second': 3.64, 'train_steps_per_second': 0.508, 'train_loss': 0.9079080157809787, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
100%|██████████| 1/1 [00:00<00:00, 141.34it/s]
100%|██████████| 1/1 [00:00<00:00, 166.74it/s]
Casting the dataset: 100%|██████████| 44/44 [00:00<00:00, 21996.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 187.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.34 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.27 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8022642731666565, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0918, 'eval_samples_per_second': 10.896, 'eval_steps_per_second': 10.896, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:10,  1.73s/it]

{'eval_loss': 0.9050158262252808, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.537, 'eval_steps_per_second': 12.537, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:34<00:00,  1.69s/it]

{'eval_loss': 0.8690937757492065, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.1182, 'eval_samples_per_second': 8.461, 'eval_steps_per_second': 8.461, 'epoch': 3.0}


100%|██████████| 18/18 [00:35<00:00,  1.99s/it]


{'train_runtime': 35.8879, 'train_samples_per_second': 3.678, 'train_steps_per_second': 0.502, 'train_loss': 0.9066844516330295, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.76it/s]
100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
100%|██████████| 1/1 [00:00<00:00, 197.57it/s]
Casting the dataset: 100%|██████████| 44/44 [00:00<00:00, 21627.72 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 957.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 191.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 313.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 330.39 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8022642731666565, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.213, 'eval_steps_per_second': 12.213, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:10,  1.73s/it]

{'eval_loss': 0.9050158262252808, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.786, 'eval_steps_per_second': 11.786, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:34<00:00,  1.76s/it]

{'eval_loss': 0.8690937757492065, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.005, 'eval_steps_per_second': 12.005, 'epoch': 3.0}


100%|██████████| 18/18 [00:36<00:00,  2.02s/it]


{'train_runtime': 36.3144, 'train_samples_per_second': 3.635, 'train_steps_per_second': 0.496, 'train_loss': 0.9066844516330295, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.94it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 165.27it/s]
Casting the dataset: 100%|██████████| 44/44 [00:00<00:00, 22009.47 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 190.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8022642731666565, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0981, 'eval_samples_per_second': 10.196, 'eval_steps_per_second': 10.196, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:10,  1.69s/it]

{'eval_loss': 0.9050158262252808, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.1002, 'eval_samples_per_second': 9.977, 'eval_steps_per_second': 9.977, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:34<00:00,  1.71s/it]

{'eval_loss': 0.8690937757492065, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.922, 'eval_steps_per_second': 11.922, 'epoch': 3.0}


100%|██████████| 18/18 [00:35<00:00,  1.99s/it]


{'train_runtime': 35.782, 'train_samples_per_second': 3.689, 'train_steps_per_second': 0.503, 'train_loss': 0.9066844516330295, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.34it/s]
100%|██████████| 1/1 [00:00<00:00, 164.61it/s]
100%|██████████| 1/1 [00:00<00:00, 164.84it/s]
Casting the dataset: 100%|██████████| 44/44 [00:00<00:00, 22001.59 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 964.43 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 193.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8022642731666565, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0843, 'eval_samples_per_second': 11.861, 'eval_steps_per_second': 11.861, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:10,  1.72s/it]

{'eval_loss': 0.9050158262252808, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.215, 'eval_steps_per_second': 12.215, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:34<00:00,  1.72s/it]

{'eval_loss': 0.8690937757492065, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0805, 'eval_samples_per_second': 12.428, 'eval_steps_per_second': 12.428, 'epoch': 3.0}


100%|██████████| 18/18 [00:35<00:00,  1.99s/it]


{'train_runtime': 35.8287, 'train_samples_per_second': 3.684, 'train_steps_per_second': 0.502, 'train_loss': 0.9066844516330295, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
100%|██████████| 1/1 [00:00<00:00, 141.51it/s]
100%|██████████| 1/1 [00:00<00:00, 141.34it/s]
Casting the dataset: 100%|██████████| 44/44 [00:00<00:00, 21998.97 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 192.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 486.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
Yo

{'eval_loss': 0.8022642731666565, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.906, 'eval_steps_per_second': 11.906, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:10,  1.70s/it]

{'eval_loss': 0.9050158262252808, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.1212, 'eval_samples_per_second': 8.252, 'eval_steps_per_second': 8.252, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:34<00:00,  1.74s/it]

{'eval_loss': 0.8690937757492065, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.081, 'eval_steps_per_second': 12.081, 'epoch': 3.0}


100%|██████████| 18/18 [00:35<00:00,  2.00s/it]


{'train_runtime': 35.9834, 'train_samples_per_second': 3.668, 'train_steps_per_second': 0.5, 'train_loss': 0.9066844516330295, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.01it/s]
100%|██████████| 1/1 [00:00<00:00, 165.24it/s]
100%|██████████| 1/1 [00:00<00:00, 164.77it/s]
Casting the dataset: 100%|██████████| 45/45 [00:00<00:00, 22498.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 180.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8025786280632019, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.372, 'eval_steps_per_second': 12.372, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:10,  1.78s/it]

{'eval_loss': 0.9042548537254333, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.687, 'eval_steps_per_second': 11.687, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.78s/it]

{'eval_loss': 0.8683104515075684, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.968, 'eval_steps_per_second': 11.968, 'epoch': 3.0}


100%|██████████| 18/18 [00:36<00:00,  2.02s/it]


{'train_runtime': 36.3969, 'train_samples_per_second': 3.709, 'train_steps_per_second': 0.495, 'train_loss': 0.9061977598402235, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.32it/s]
100%|██████████| 1/1 [00:00<00:00, 41.11it/s]
100%|██████████| 1/1 [00:00<00:00, 164.84it/s]
Casting the dataset: 100%|██████████| 45/45 [00:00<00:00, 14826.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 188.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 334.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8025786280632019, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.221, 'eval_steps_per_second': 12.221, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.79s/it]

{'eval_loss': 0.9042548537254333, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.226, 'eval_steps_per_second': 12.226, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.80s/it]

{'eval_loss': 0.8683104515075684, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.08, 'eval_samples_per_second': 12.497, 'eval_steps_per_second': 12.497, 'epoch': 3.0}


100%|██████████| 18/18 [00:36<00:00,  2.05s/it]


{'train_runtime': 36.8703, 'train_samples_per_second': 3.661, 'train_steps_per_second': 0.488, 'train_loss': 0.9061977598402235, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.89it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 164.57it/s]
Casting the dataset: 100%|██████████| 45/45 [00:00<00:00, 21931.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 178.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8025786280632019, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.213, 'eval_steps_per_second': 12.213, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.78s/it]

{'eval_loss': 0.9042548537254333, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.203, 'eval_steps_per_second': 12.203, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.78s/it]

{'eval_loss': 0.8683104515075684, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.926, 'eval_steps_per_second': 11.926, 'epoch': 3.0}


100%|██████████| 18/18 [00:36<00:00,  2.03s/it]


{'train_runtime': 36.601, 'train_samples_per_second': 3.688, 'train_steps_per_second': 0.492, 'train_loss': 0.9061977598402235, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.48it/s]
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
100%|██████████| 1/1 [00:00<00:00, 141.35it/s]
Casting the dataset: 100%|██████████| 45/45 [00:00<00:00, 22101.13 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 189.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.70 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8025786280632019, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.783, 'eval_steps_per_second': 11.783, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.80s/it]

{'eval_loss': 0.9042548537254333, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.219, 'eval_steps_per_second': 12.219, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.78s/it]

{'eval_loss': 0.8683104515075684, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.208, 'eval_steps_per_second': 12.208, 'epoch': 3.0}


100%|██████████| 18/18 [00:36<00:00,  2.04s/it]


{'train_runtime': 36.6728, 'train_samples_per_second': 3.681, 'train_steps_per_second': 0.491, 'train_loss': 0.9061977598402235, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 140.99it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
Casting the dataset: 100%|██████████| 45/45 [00:00<00:00, 14999.90 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 960.01 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 183.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8025786280632019, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.357, 'eval_steps_per_second': 12.357, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.80s/it]

{'eval_loss': 0.9042548537254333, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.221, 'eval_steps_per_second': 12.221, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.82s/it]

{'eval_loss': 0.8683104515075684, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.224, 'eval_steps_per_second': 12.224, 'epoch': 3.0}


100%|██████████| 18/18 [00:36<00:00,  2.05s/it]


{'train_runtime': 36.9542, 'train_samples_per_second': 3.653, 'train_steps_per_second': 0.487, 'train_loss': 0.9061977598402235, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.65it/s]
100%|██████████| 1/1 [00:00<00:00, 141.35it/s]
100%|██████████| 1/1 [00:00<00:00, 164.60it/s]
Casting the dataset: 100%|██████████| 46/46 [00:00<00:00, 23015.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 176.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 238.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8026782274246216, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.855, 'eval_steps_per_second': 12.855, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.83s/it]

{'eval_loss': 0.9042112231254578, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.849, 'eval_steps_per_second': 12.849, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.86s/it]

{'eval_loss': 0.8688139915466309, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.21, 'eval_steps_per_second': 12.21, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.06s/it]


{'train_runtime': 37.1046, 'train_samples_per_second': 3.719, 'train_steps_per_second': 0.485, 'train_loss': 0.9064003626505533, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.15it/s]
100%|██████████| 1/1 [00:00<00:00, 166.63it/s]
100%|██████████| 1/1 [00:00<00:00, 141.46it/s]
Casting the dataset: 100%|██████████| 46/46 [00:00<00:00, 22568.49 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 955.42 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 176.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.66 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8026782274246216, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0764, 'eval_samples_per_second': 13.083, 'eval_steps_per_second': 13.083, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.84s/it]

{'eval_loss': 0.9042112231254578, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.017, 'eval_steps_per_second': 13.017, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.83s/it]

{'eval_loss': 0.8688139915466309, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0736, 'eval_samples_per_second': 13.592, 'eval_steps_per_second': 13.592, 'epoch': 3.0}


100%|██████████| 18/18 [00:36<00:00,  2.05s/it]


{'train_runtime': 36.8518, 'train_samples_per_second': 3.745, 'train_steps_per_second': 0.488, 'train_loss': 0.9064003626505533, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 33.07it/s]
100%|██████████| 1/1 [00:00<00:00, 141.57it/s]
100%|██████████| 1/1 [00:00<00:00, 165.32it/s]
Casting the dataset: 100%|██████████| 46/46 [00:00<00:00, 22996.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 181.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.61 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8026782274246216, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.218, 'eval_steps_per_second': 12.218, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.85s/it]

{'eval_loss': 0.9042112231254578, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.363, 'eval_steps_per_second': 13.363, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.85s/it]

{'eval_loss': 0.8688139915466309, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.855, 'eval_steps_per_second': 12.855, 'epoch': 3.0}


100%|██████████| 18/18 [00:36<00:00,  2.05s/it]


{'train_runtime': 36.9793, 'train_samples_per_second': 3.732, 'train_steps_per_second': 0.487, 'train_loss': 0.9064003626505533, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
100%|██████████| 1/1 [00:00<00:00, 164.71it/s]
100%|██████████| 1/1 [00:00<00:00, 141.40it/s]
Casting the dataset: 100%|██████████| 46/46 [00:00<00:00, 15332.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 923.65 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 881.53 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 180.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.42 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8026782274246216, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.211, 'eval_steps_per_second': 12.211, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.84s/it]

{'eval_loss': 0.9042112231254578, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.194, 'eval_steps_per_second': 13.194, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.84s/it]

{'eval_loss': 0.8688139915466309, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0775, 'eval_samples_per_second': 12.906, 'eval_steps_per_second': 12.906, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.06s/it]


{'train_runtime': 37.1683, 'train_samples_per_second': 3.713, 'train_steps_per_second': 0.484, 'train_loss': 0.9064003626505533, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
100%|██████████| 1/1 [00:00<00:00, 141.53it/s]
100%|██████████| 1/1 [00:00<00:00, 141.74it/s]
Casting the dataset: 100%|██████████| 46/46 [00:00<00:00, 23012.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 183.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 313.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.81 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8026782274246216, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.535, 'eval_steps_per_second': 12.535, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.85s/it]

{'eval_loss': 0.9042112231254578, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.001, 'eval_steps_per_second': 13.001, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.86s/it]

{'eval_loss': 0.8688139915466309, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.023, 'eval_steps_per_second': 13.023, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.07s/it]


{'train_runtime': 37.2943, 'train_samples_per_second': 3.7, 'train_steps_per_second': 0.483, 'train_loss': 0.9064003626505533, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.74it/s]
100%|██████████| 1/1 [00:00<00:00, 164.81it/s]
100%|██████████| 1/1 [00:00<00:00, 197.40it/s]
Casting the dataset: 100%|██████████| 47/47 [00:00<00:00, 23521.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 175.64 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8025933504104614, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.216, 'eval_steps_per_second': 12.216, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.89s/it]

{'eval_loss': 0.9037628173828125, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.365, 'eval_steps_per_second': 12.365, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.88s/it]

{'eval_loss': 0.8682777285575867, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.06, 'eval_steps_per_second': 12.06, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.07s/it]


{'train_runtime': 37.3256, 'train_samples_per_second': 3.778, 'train_steps_per_second': 0.482, 'train_loss': 0.9059072070651584, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.33it/s]
100%|██████████| 1/1 [00:00<00:00, 141.35it/s]
100%|██████████| 1/1 [00:00<00:00, 164.70it/s]
Casting the dataset: 100%|██████████| 47/47 [00:00<00:00, 23479.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 171.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.42 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8025933504104614, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.526, 'eval_steps_per_second': 12.526, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.91s/it]

{'eval_loss': 0.9037628173828125, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.091, 'eval_samples_per_second': 10.991, 'eval_steps_per_second': 10.991, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.90s/it]

{'eval_loss': 0.8682777285575867, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 11.999, 'eval_steps_per_second': 11.999, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.09s/it]


{'train_runtime': 37.6902, 'train_samples_per_second': 3.741, 'train_steps_per_second': 0.478, 'train_loss': 0.9059072070651584, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.44it/s]
100%|██████████| 1/1 [00:00<00:00, 163.90it/s]
100%|██████████| 1/1 [00:00<00:00, 164.73it/s]
Casting the dataset: 100%|██████████| 47/47 [00:00<00:00, 23504.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 962.00 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.10 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 179.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8025933504104614, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.071, 'eval_steps_per_second': 12.071, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.89s/it]

{'eval_loss': 0.9037628173828125, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.01, 'eval_steps_per_second': 13.01, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.93s/it]

{'eval_loss': 0.8682777285575867, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0825, 'eval_samples_per_second': 12.119, 'eval_steps_per_second': 12.119, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.10s/it]


{'train_runtime': 37.7373, 'train_samples_per_second': 3.736, 'train_steps_per_second': 0.477, 'train_loss': 0.9059072070651584, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.46it/s]
100%|██████████| 1/1 [00:00<00:00, 142.36it/s]
100%|██████████| 1/1 [00:00<00:00, 165.00it/s]
Casting the dataset: 100%|██████████| 47/47 [00:00<00:00, 23032.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 961.78 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 172.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.22 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8025933504104614, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.376, 'eval_steps_per_second': 11.376, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:24<00:11,  1.92s/it]

{'eval_loss': 0.9037628173828125, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.195, 'eval_steps_per_second': 13.195, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.91s/it]

{'eval_loss': 0.8682777285575867, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.208, 'eval_steps_per_second': 12.208, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.10s/it]


{'train_runtime': 37.8513, 'train_samples_per_second': 3.725, 'train_steps_per_second': 0.476, 'train_loss': 0.9059072070651584, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.90it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 47/47 [00:00<00:00, 23501.70 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 183.24 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8025933504104614, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0789, 'eval_samples_per_second': 12.672, 'eval_steps_per_second': 12.672, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.89s/it]

{'eval_loss': 0.9037628173828125, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.916, 'eval_steps_per_second': 11.916, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.92s/it]

{'eval_loss': 0.8682777285575867, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.79, 'eval_steps_per_second': 11.79, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.09s/it]


{'train_runtime': 37.6983, 'train_samples_per_second': 3.74, 'train_steps_per_second': 0.477, 'train_loss': 0.9059072070651584, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.65it/s]
100%|██████████| 1/1 [00:00<00:00, 164.47it/s]
100%|██████████| 1/1 [00:00<00:00, 164.60it/s]
Casting the dataset: 100%|██████████| 48/48 [00:00<00:00, 24018.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 172.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 437.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8028197288513184, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0855, 'eval_samples_per_second': 11.698, 'eval_steps_per_second': 11.698, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:24<00:11,  1.96s/it]

{'eval_loss': 0.9033061861991882, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.074, 'eval_steps_per_second': 12.074, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.95s/it]

{'eval_loss': 0.8674927949905396, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.923, 'eval_steps_per_second': 11.923, 'epoch': 3.0}


100%|██████████| 18/18 [00:38<00:00,  2.12s/it]


{'train_runtime': 38.0879, 'train_samples_per_second': 3.781, 'train_steps_per_second': 0.473, 'train_loss': 0.9062545564439561, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 161.59it/s]
100%|██████████| 1/1 [00:00<00:00, 141.06it/s]
100%|██████████| 1/1 [00:00<00:00, 166.63it/s]
Casting the dataset: 100%|██████████| 48/48 [00:00<00:00, 23998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 491.25 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 167.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.85 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8028197288513184, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.215, 'eval_steps_per_second': 12.215, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.92s/it]

{'eval_loss': 0.9033061861991882, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.917, 'eval_steps_per_second': 11.917, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.95s/it]

{'eval_loss': 0.8674927949905396, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.063, 'eval_steps_per_second': 12.063, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.10s/it]


{'train_runtime': 37.8445, 'train_samples_per_second': 3.805, 'train_steps_per_second': 0.476, 'train_loss': 0.9062545564439561, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.24it/s]
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
Casting the dataset: 100%|██████████| 48/48 [00:00<00:00, 24018.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.70 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 171.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8028197288513184, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.358, 'eval_steps_per_second': 12.358, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:24<00:11,  1.96s/it]

{'eval_loss': 0.9033061861991882, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.05, 'eval_steps_per_second': 12.05, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.94s/it]

{'eval_loss': 0.8674927949905396, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.192, 'eval_steps_per_second': 13.192, 'epoch': 3.0}


100%|██████████| 18/18 [00:38<00:00,  2.11s/it]


{'train_runtime': 38.0533, 'train_samples_per_second': 3.784, 'train_steps_per_second': 0.473, 'train_loss': 0.9062545564439561, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.80it/s]
100%|██████████| 1/1 [00:00<00:00, 141.46it/s]
100%|██████████| 1/1 [00:00<00:00, 165.01it/s]
Casting the dataset: 100%|██████████| 48/48 [00:00<00:00, 24016.06 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 168.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 331.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8028197288513184, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.06, 'eval_steps_per_second': 12.06, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.94s/it]

{'eval_loss': 0.9033061861991882, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.926, 'eval_steps_per_second': 11.926, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.97s/it]

{'eval_loss': 0.8674927949905396, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.641, 'eval_steps_per_second': 11.641, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.11s/it]


{'train_runtime': 37.9551, 'train_samples_per_second': 3.794, 'train_steps_per_second': 0.474, 'train_loss': 0.9062545564439561, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
100%|██████████| 1/1 [00:00<00:00, 164.98it/s]
Casting the dataset: 100%|██████████| 48/48 [00:00<00:00, 23524.96 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 491.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 172.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 314.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8028197288513184, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.088, 'eval_samples_per_second': 11.369, 'eval_steps_per_second': 11.369, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:24<00:11,  1.98s/it]

{'eval_loss': 0.9033061861991882, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0804, 'eval_samples_per_second': 12.433, 'eval_steps_per_second': 12.433, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:37<00:00,  2.00s/it]

{'eval_loss': 0.8674927949905396, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.224, 'eval_steps_per_second': 12.224, 'epoch': 3.0}


100%|██████████| 18/18 [00:38<00:00,  2.15s/it]


{'train_runtime': 38.6599, 'train_samples_per_second': 3.725, 'train_steps_per_second': 0.466, 'train_loss': 0.9062545564439561, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.70it/s]
100%|██████████| 1/1 [00:00<00:00, 141.24it/s]
100%|██████████| 1/1 [00:00<00:00, 153.15it/s]
Casting the dataset: 100%|██████████| 49/49 [00:00<00:00, 24498.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 164.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.793695330619812, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.018, 'eval_steps_per_second': 13.018, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:25<00:10,  1.54s/it]

{'eval_loss': 0.8718662858009338, 'eval_precision': 1.0, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.377, 'eval_steps_per_second': 12.377, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:38<00:00,  1.56s/it]

{'eval_loss': 0.8353418707847595, 'eval_precision': 0.5, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.22222222222222224, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.218, 'eval_steps_per_second': 12.218, 'epoch': 3.0}


100%|██████████| 21/21 [00:40<00:00,  1.91s/it]


{'train_runtime': 40.0572, 'train_samples_per_second': 3.67, 'train_steps_per_second': 0.524, 'train_loss': 0.8222624460856119, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
100%|██████████| 1/1 [00:00<00:00, 68.26it/s]
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
Casting the dataset: 100%|██████████| 49/49 [00:00<00:00, 24498.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 171.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 487.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.872771680355072, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.212, 'eval_steps_per_second': 12.212, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.58s/it]

{'eval_loss': 0.8368541598320007, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0895, 'eval_samples_per_second': 11.174, 'eval_steps_per_second': 11.174, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:39<00:00,  1.60s/it]

{'eval_loss': 0.8177103400230408, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.089, 'eval_samples_per_second': 11.24, 'eval_steps_per_second': 11.24, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.97s/it]


{'train_runtime': 41.271, 'train_samples_per_second': 3.562, 'train_steps_per_second': 0.509, 'train_loss': 0.7988474709647042, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 83.03it/s]
100%|██████████| 1/1 [00:00<00:00, 141.63it/s]
100%|██████████| 1/1 [00:00<00:00, 164.90it/s]
Casting the dataset: 100%|██████████| 49/49 [00:00<00:00, 24501.78 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 164.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.872771680355072, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.651, 'eval_steps_per_second': 11.651, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:25<00:10,  1.54s/it]

{'eval_loss': 0.8368541598320007, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.773, 'eval_steps_per_second': 11.773, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:38<00:00,  1.55s/it]

{'eval_loss': 0.8177103400230408, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.297, 'eval_steps_per_second': 12.297, 'epoch': 3.0}


100%|██████████| 21/21 [00:40<00:00,  1.91s/it]


{'train_runtime': 40.1441, 'train_samples_per_second': 3.662, 'train_steps_per_second': 0.523, 'train_loss': 0.7988474709647042, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
100%|██████████| 1/1 [00:00<00:00, 165.08it/s]
Casting the dataset: 100%|██████████| 49/49 [00:00<00:00, 24516.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 163.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.872771680355072, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.651, 'eval_steps_per_second': 11.651, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.59s/it]

{'eval_loss': 0.8368541598320007, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.379, 'eval_steps_per_second': 11.379, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:39<00:00,  1.60s/it]

{'eval_loss': 0.8177103400230408, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.002, 'eval_steps_per_second': 13.002, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.96s/it]


{'train_runtime': 41.1252, 'train_samples_per_second': 3.574, 'train_steps_per_second': 0.511, 'train_loss': 0.7988474709647042, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.87it/s]
100%|██████████| 1/1 [00:00<00:00, 164.59it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 49/49 [00:00<00:00, 24026.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 959.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 165.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 501.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 471.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 313.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.872771680355072, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0878, 'eval_samples_per_second': 11.392, 'eval_steps_per_second': 11.392, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:25<00:10,  1.55s/it]

{'eval_loss': 0.8368541598320007, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.52, 'eval_steps_per_second': 11.52, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:38<00:00,  1.55s/it]

{'eval_loss': 0.8177103400230408, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0764, 'eval_samples_per_second': 13.089, 'eval_steps_per_second': 13.089, 'epoch': 3.0}


100%|██████████| 21/21 [00:40<00:00,  1.92s/it]


{'train_runtime': 40.314, 'train_samples_per_second': 3.646, 'train_steps_per_second': 0.521, 'train_loss': 0.7988474709647042, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 98.68it/s]
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
100%|██████████| 1/1 [00:00<00:00, 141.75it/s]
Casting the dataset: 100%|██████████| 50/50 [00:00<00:00, 25013.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 168.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 331.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.66 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.8713042140007019, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.217, 'eval_steps_per_second': 12.217, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.65s/it]

{'eval_loss': 0.8333522081375122, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.087, 'eval_samples_per_second': 11.498, 'eval_steps_per_second': 11.498, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:39<00:00,  1.63s/it]

{'eval_loss': 0.8136709332466125, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0744, 'eval_samples_per_second': 13.44, 'eval_steps_per_second': 13.44, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.97s/it]


{'train_runtime': 41.2811, 'train_samples_per_second': 3.634, 'train_steps_per_second': 0.509, 'train_loss': 0.7945228758312407, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
100%|██████████| 1/1 [00:00<00:00, 140.90it/s]
100%|██████████| 1/1 [00:00<00:00, 141.68it/s]
Casting the dataset: 100%|██████████| 50/50 [00:00<00:00, 16436.65 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.63 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 959.58 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 166.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.8713042140007019, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.217, 'eval_steps_per_second': 12.217, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:25<00:11,  1.60s/it]

{'eval_loss': 0.8333522081375122, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.376, 'eval_steps_per_second': 12.376, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:39<00:00,  1.60s/it]

{'eval_loss': 0.8136709332466125, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.641, 'eval_steps_per_second': 11.641, 'epoch': 3.0}


100%|██████████| 21/21 [00:40<00:00,  1.94s/it]


{'train_runtime': 40.6657, 'train_samples_per_second': 3.689, 'train_steps_per_second': 0.516, 'train_loss': 0.7945228758312407, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.16it/s]
100%|██████████| 1/1 [00:00<00:00, 142.88it/s]
100%|██████████| 1/1 [00:00<00:00, 164.98it/s]
Casting the dataset: 100%|██████████| 50/50 [00:00<00:00, 16671.85 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 966.43 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 953.03 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 158.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.8713042140007019, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.195, 'eval_steps_per_second': 13.195, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:25<00:11,  1.62s/it]

{'eval_loss': 0.8333522081375122, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.137, 'eval_steps_per_second': 12.137, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:39<00:00,  1.61s/it]

{'eval_loss': 0.8136709332466125, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.855, 'eval_steps_per_second': 11.855, 'epoch': 3.0}


100%|██████████| 21/21 [00:40<00:00,  1.94s/it]


{'train_runtime': 40.8122, 'train_samples_per_second': 3.675, 'train_steps_per_second': 0.515, 'train_loss': 0.7945228758312407, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
100%|██████████| 1/1 [00:00<00:00, 164.86it/s]
Casting the dataset: 100%|██████████| 50/50 [00:00<00:00, 16674.50 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 163.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 487.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.8713042140007019, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.646, 'eval_steps_per_second': 11.646, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:25<00:11,  1.60s/it]

{'eval_loss': 0.8333522081375122, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.52, 'eval_steps_per_second': 11.52, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:39<00:00,  1.61s/it]

{'eval_loss': 0.8136709332466125, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.684, 'eval_steps_per_second': 12.684, 'epoch': 3.0}


100%|██████████| 21/21 [00:40<00:00,  1.94s/it]


{'train_runtime': 40.7073, 'train_samples_per_second': 3.685, 'train_steps_per_second': 0.516, 'train_loss': 0.7945228758312407, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
100%|██████████| 1/1 [00:00<00:00, 141.30it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 50/50 [00:00<00:00, 24983.94 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 161.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.8713042140007019, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.242, 'eval_steps_per_second': 11.242, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.63s/it]

{'eval_loss': 0.8333522081375122, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.772, 'eval_steps_per_second': 11.772, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:39<00:00,  1.63s/it]

{'eval_loss': 0.8136709332466125, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.926, 'eval_steps_per_second': 11.926, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.96s/it]


{'train_runtime': 41.0773, 'train_samples_per_second': 3.652, 'train_steps_per_second': 0.511, 'train_loss': 0.7945228758312407, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
100%|██████████| 1/1 [00:00<00:00, 164.99it/s]
100%|██████████| 1/1 [00:00<00:00, 141.49it/s]
Casting the dataset: 100%|██████████| 51/51 [00:00<00:00, 16750.94 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 158.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.872270405292511, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1083, 'eval_samples_per_second': 9.235, 'eval_steps_per_second': 9.235, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.66s/it]

{'eval_loss': 0.836794376373291, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.924, 'eval_steps_per_second': 11.924, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.67s/it]

{'eval_loss': 0.8172408938407898, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.088, 'eval_samples_per_second': 11.359, 'eval_steps_per_second': 11.359, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.98s/it]


{'train_runtime': 41.6077, 'train_samples_per_second': 3.677, 'train_steps_per_second': 0.505, 'train_loss': 0.7934536706833613, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 76.00it/s]
100%|██████████| 1/1 [00:00<00:00, 141.39it/s]
100%|██████████| 1/1 [00:00<00:00, 164.68it/s]
Casting the dataset: 100%|██████████| 51/51 [00:00<00:00, 25501.85 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 161.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 330.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.872270405292511, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.091, 'eval_samples_per_second': 10.984, 'eval_steps_per_second': 10.984, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.66s/it]

{'eval_loss': 0.836794376373291, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.904, 'eval_steps_per_second': 11.904, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:39<00:00,  1.65s/it]

{'eval_loss': 0.8172408938407898, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.763, 'eval_steps_per_second': 11.763, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.96s/it]


{'train_runtime': 41.209, 'train_samples_per_second': 3.713, 'train_steps_per_second': 0.51, 'train_loss': 0.7934536706833613, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.82it/s]
100%|██████████| 1/1 [00:00<00:00, 164.97it/s]
100%|██████████| 1/1 [00:00<00:00, 141.65it/s]
Casting the dataset: 100%|██████████| 51/51 [00:00<00:00, 16785.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 149.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.872270405292511, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1315, 'eval_samples_per_second': 7.607, 'eval_steps_per_second': 7.607, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.67s/it]

{'eval_loss': 0.836794376373291, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.094, 'eval_samples_per_second': 10.633, 'eval_steps_per_second': 10.633, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.65s/it]

{'eval_loss': 0.8172408938407898, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.088, 'eval_samples_per_second': 11.364, 'eval_steps_per_second': 11.364, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.97s/it]


{'train_runtime': 41.4037, 'train_samples_per_second': 3.695, 'train_steps_per_second': 0.507, 'train_loss': 0.7934536706833613, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
100%|██████████| 1/1 [00:00<00:00, 165.00it/s]
Casting the dataset: 100%|██████████| 51/51 [00:00<00:00, 25498.81 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.16 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 164.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.70 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.872270405292511, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.501, 'eval_steps_per_second': 11.501, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.65s/it]

{'eval_loss': 0.836794376373291, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.1284, 'eval_samples_per_second': 7.788, 'eval_steps_per_second': 7.788, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:39<00:00,  1.64s/it]

{'eval_loss': 0.8172408938407898, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.48, 'eval_steps_per_second': 11.48, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.95s/it]


{'train_runtime': 41.0043, 'train_samples_per_second': 3.731, 'train_steps_per_second': 0.512, 'train_loss': 0.7934536706833613, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.21it/s]
100%|██████████| 1/1 [00:00<00:00, 142.82it/s]
100%|██████████| 1/1 [00:00<00:00, 141.71it/s]
Casting the dataset: 100%|██████████| 51/51 [00:00<00:00, 24890.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 949.58 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 161.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.872270405292511, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.067, 'eval_steps_per_second': 12.067, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.65s/it]

{'eval_loss': 0.836794376373291, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.011, 'eval_steps_per_second': 12.011, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:39<00:00,  1.65s/it]

{'eval_loss': 0.8172408938407898, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.079, 'eval_samples_per_second': 12.663, 'eval_steps_per_second': 12.663, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.96s/it]


{'train_runtime': 41.0994, 'train_samples_per_second': 3.723, 'train_steps_per_second': 0.511, 'train_loss': 0.7934536706833613, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 151.71it/s]
100%|██████████| 1/1 [00:00<00:00, 141.20it/s]
100%|██████████| 1/1 [00:00<00:00, 165.16it/s]
Casting the dataset: 100%|██████████| 52/52 [00:00<00:00, 25542.08 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 960.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 162.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.8717120885848999, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0933, 'eval_samples_per_second': 10.717, 'eval_steps_per_second': 10.717, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.71s/it]

{'eval_loss': 0.8351272344589233, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.53, 'eval_steps_per_second': 12.53, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.74s/it]

{'eval_loss': 0.8143396973609924, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.845, 'eval_steps_per_second': 12.845, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  2.00s/it]


{'train_runtime': 41.9652, 'train_samples_per_second': 3.717, 'train_steps_per_second': 0.5, 'train_loss': 0.7927762894403367, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.29it/s]
100%|██████████| 1/1 [00:00<00:00, 164.09it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 52/52 [00:00<00:00, 17124.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 164.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.27 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.8717120885848999, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.626, 'eval_steps_per_second': 11.626, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.70s/it]

{'eval_loss': 0.8351272344589233, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.1107, 'eval_samples_per_second': 9.037, 'eval_steps_per_second': 9.037, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.71s/it]

{'eval_loss': 0.8143396973609924, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0899, 'eval_samples_per_second': 11.129, 'eval_steps_per_second': 11.129, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.98s/it]


{'train_runtime': 41.6188, 'train_samples_per_second': 3.748, 'train_steps_per_second': 0.505, 'train_loss': 0.7927762894403367, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 89.77it/s]
100%|██████████| 1/1 [00:00<00:00, 141.23it/s]
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
Casting the dataset: 100%|██████████| 52/52 [00:00<00:00, 17334.59 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 958.48 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 165.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.8717120885848999, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.216, 'eval_steps_per_second': 12.216, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.70s/it]

{'eval_loss': 0.8351272344589233, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.1043, 'eval_samples_per_second': 9.588, 'eval_steps_per_second': 9.588, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.70s/it]

{'eval_loss': 0.8143396973609924, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.092, 'eval_samples_per_second': 10.87, 'eval_steps_per_second': 10.87, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.99s/it]


{'train_runtime': 41.7669, 'train_samples_per_second': 3.735, 'train_steps_per_second': 0.503, 'train_loss': 0.7927762894403367, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
100%|██████████| 1/1 [00:00<00:00, 141.53it/s]
100%|██████████| 1/1 [00:00<00:00, 141.46it/s]
Casting the dataset: 100%|██████████| 52/52 [00:00<00:00, 25977.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.99 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 167.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.8717120885848999, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.223, 'eval_steps_per_second': 12.223, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:12,  1.72s/it]

{'eval_loss': 0.8351272344589233, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.787, 'eval_steps_per_second': 11.787, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.74s/it]

{'eval_loss': 0.8143396973609924, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.774, 'eval_steps_per_second': 11.774, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  2.00s/it]


{'train_runtime': 41.8953, 'train_samples_per_second': 3.724, 'train_steps_per_second': 0.501, 'train_loss': 0.7927762894403367, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 110.09it/s]
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 165.07it/s]
Casting the dataset: 100%|██████████| 52/52 [00:00<00:00, 25494.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 153.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.8717120885848999, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.039, 'eval_steps_per_second': 12.039, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:12,  1.72s/it]

{'eval_loss': 0.8351272344589233, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.091, 'eval_samples_per_second': 10.993, 'eval_steps_per_second': 10.993, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.74s/it]

{'eval_loss': 0.8143396973609924, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.52, 'eval_steps_per_second': 11.52, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.99s/it]


{'train_runtime': 41.8539, 'train_samples_per_second': 3.727, 'train_steps_per_second': 0.502, 'train_loss': 0.7927762894403367, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.61it/s]
100%|██████████| 1/1 [00:00<00:00, 164.98it/s]
100%|██████████| 1/1 [00:00<00:00, 141.83it/s]
Casting the dataset: 100%|██████████| 53/53 [00:00<00:00, 26505.08 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 158.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.8731226921081543, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0855, 'eval_samples_per_second': 11.7, 'eval_steps_per_second': 11.7, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.81s/it]

{'eval_loss': 0.8361870646476746, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.224, 'eval_steps_per_second': 12.224, 'epoch': 2.0}


100%|██████████| 21/21 [00:41<00:00,  1.79s/it]
100%|██████████| 21/21 [00:41<00:00,  1.79s/it]

{'eval_loss': 0.8162943720817566, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.25, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.787, 'eval_steps_per_second': 11.787, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.05s/it]


{'train_runtime': 43.0586, 'train_samples_per_second': 3.693, 'train_steps_per_second': 0.488, 'train_loss': 0.7933249700637091, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.79it/s]
100%|██████████| 1/1 [00:00<00:00, 141.32it/s]
100%|██████████| 1/1 [00:00<00:00, 164.75it/s]
Casting the dataset: 100%|██████████| 53/53 [00:00<00:00, 25951.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 153.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream tas

{'eval_loss': 0.8731226921081543, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.073, 'eval_steps_per_second': 12.073, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:12,  1.78s/it]

{'eval_loss': 0.8361870646476746, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.854, 'eval_steps_per_second': 12.854, 'epoch': 2.0}


100%|██████████| 21/21 [00:40<00:00,  1.77s/it]
100%|██████████| 21/21 [00:41<00:00,  1.77s/it]

{'eval_loss': 0.8162943720817566, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.25, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.207, 'eval_steps_per_second': 12.207, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.01s/it]


{'train_runtime': 42.3028, 'train_samples_per_second': 3.759, 'train_steps_per_second': 0.496, 'train_loss': 0.7933249700637091, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.53it/s]
100%|██████████| 1/1 [00:00<00:00, 141.39it/s]
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
Casting the dataset: 100%|██████████| 53/53 [00:00<00:00, 17667.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 963.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.73 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 161.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.8731226921081543, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.797, 'eval_steps_per_second': 11.797, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.80s/it]

{'eval_loss': 0.8361870646476746, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.363, 'eval_steps_per_second': 12.363, 'epoch': 2.0}


100%|██████████| 21/21 [00:41<00:00,  1.82s/it]
100%|██████████| 21/21 [00:41<00:00,  1.82s/it]

{'eval_loss': 0.8162943720817566, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.25, 'eval_runtime': 0.0853, 'eval_samples_per_second': 11.724, 'eval_steps_per_second': 11.724, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.05s/it]


{'train_runtime': 43.0402, 'train_samples_per_second': 3.694, 'train_steps_per_second': 0.488, 'train_loss': 0.7933249700637091, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 140.93it/s]
100%|██████████| 1/1 [00:00<00:00, 141.59it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
Casting the dataset: 100%|██████████| 53/53 [00:00<00:00, 26501.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 159.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.8731226921081543, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.361, 'eval_steps_per_second': 12.361, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.80s/it]

{'eval_loss': 0.8361870646476746, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.064, 'eval_steps_per_second': 12.064, 'epoch': 2.0}


100%|██████████| 21/21 [00:41<00:00,  1.78s/it]
100%|██████████| 21/21 [00:41<00:00,  1.78s/it]

{'eval_loss': 0.8162943720817566, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.25, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.287, 'eval_steps_per_second': 12.287, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.04s/it]


{'train_runtime': 42.9345, 'train_samples_per_second': 3.703, 'train_steps_per_second': 0.489, 'train_loss': 0.7933249700637091, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.48it/s]
100%|██████████| 1/1 [00:00<00:00, 141.08it/s]
100%|██████████| 1/1 [00:00<00:00, 164.53it/s]
Casting the dataset: 100%|██████████| 53/53 [00:00<00:00, 26498.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 154.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.8731226921081543, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.214, 'eval_steps_per_second': 12.214, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.82s/it]

{'eval_loss': 0.8361870646476746, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.129, 'eval_steps_per_second': 12.129, 'epoch': 2.0}


100%|██████████| 21/21 [00:41<00:00,  1.82s/it]
100%|██████████| 21/21 [00:41<00:00,  1.82s/it]

{'eval_loss': 0.8162943720817566, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.25, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.779, 'eval_steps_per_second': 11.779, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.06s/it]


{'train_runtime': 43.1964, 'train_samples_per_second': 3.681, 'train_steps_per_second': 0.486, 'train_loss': 0.7933249700637091, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.43it/s]
100%|██████████| 1/1 [00:00<00:00, 141.25it/s]
100%|██████████| 1/1 [00:00<00:00, 164.86it/s]
Casting the dataset: 100%|██████████| 54/54 [00:00<00:00, 26410.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 957.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 152.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.27 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be abl

{'eval_loss': 0.8728398084640503, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0762, 'eval_samples_per_second': 13.12, 'eval_steps_per_second': 13.12, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.81s/it]

{'eval_loss': 0.8351021409034729, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 12.999, 'eval_steps_per_second': 12.999, 'epoch': 2.0}


100%|██████████| 21/21 [00:41<00:00,  1.85s/it]
100%|██████████| 21/21 [00:41<00:00,  1.85s/it]

{'eval_loss': 0.8147901296615601, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.25, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.062, 'eval_steps_per_second': 12.062, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.06s/it]


{'train_runtime': 43.2677, 'train_samples_per_second': 3.744, 'train_steps_per_second': 0.485, 'train_loss': 0.7932256062825521, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
100%|██████████| 1/1 [00:00<00:00, 141.62it/s]
100%|██████████| 1/1 [00:00<00:00, 150.08it/s]
Casting the dataset: 100%|██████████| 54/54 [00:00<00:00, 17783.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 149.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 488.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.8728398084640503, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.06, 'eval_steps_per_second': 12.06, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.85s/it]

{'eval_loss': 0.8351021409034729, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0825, 'eval_samples_per_second': 12.125, 'eval_steps_per_second': 12.125, 'epoch': 2.0}


100%|██████████| 21/21 [00:41<00:00,  1.86s/it]
100%|██████████| 21/21 [00:41<00:00,  1.86s/it]

{'eval_loss': 0.8147901296615601, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.25, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.018, 'eval_steps_per_second': 13.018, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.06s/it]


{'train_runtime': 43.2666, 'train_samples_per_second': 3.744, 'train_steps_per_second': 0.485, 'train_loss': 0.7932256062825521, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.52it/s]
100%|██████████| 1/1 [00:00<00:00, 164.45it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
Casting the dataset: 100%|██████████| 54/54 [00:00<00:00, 27001.96 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1983.12 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 155.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.8728398084640503, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0825, 'eval_samples_per_second': 12.122, 'eval_steps_per_second': 12.122, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.84s/it]

{'eval_loss': 0.8351021409034729, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.847, 'eval_steps_per_second': 12.847, 'epoch': 2.0}


100%|██████████| 21/21 [00:41<00:00,  1.84s/it]
100%|██████████| 21/21 [00:41<00:00,  1.84s/it]

{'eval_loss': 0.8147901296615601, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.25, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.915, 'eval_steps_per_second': 11.915, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.05s/it]


{'train_runtime': 42.9644, 'train_samples_per_second': 3.771, 'train_steps_per_second': 0.489, 'train_loss': 0.7932256062825521, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
Casting the dataset: 100%|██████████| 54/54 [00:00<00:00, 27011.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 158.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 322.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 323.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.8728398084640503, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.156, 'eval_steps_per_second': 12.156, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.85s/it]

{'eval_loss': 0.8351021409034729, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.919, 'eval_steps_per_second': 11.919, 'epoch': 2.0}


100%|██████████| 21/21 [00:42<00:00,  1.86s/it]
100%|██████████| 21/21 [00:42<00:00,  1.86s/it]

{'eval_loss': 0.8147901296615601, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.25, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.063, 'eval_steps_per_second': 12.063, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.07s/it]


{'train_runtime': 43.4761, 'train_samples_per_second': 3.726, 'train_steps_per_second': 0.483, 'train_loss': 0.7932256062825521, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.41it/s]
100%|██████████| 1/1 [00:00<00:00, 141.49it/s]
100%|██████████| 1/1 [00:00<00:00, 164.94it/s]
Casting the dataset: 100%|██████████| 54/54 [00:00<00:00, 27005.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 153.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.08 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.8728398084640503, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0738, 'eval_samples_per_second': 13.554, 'eval_steps_per_second': 13.554, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.84s/it]

{'eval_loss': 0.8351021409034729, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.067, 'eval_steps_per_second': 12.067, 'epoch': 2.0}


100%|██████████| 21/21 [00:41<00:00,  1.84s/it]
100%|██████████| 21/21 [00:41<00:00,  1.84s/it]

{'eval_loss': 0.8147901296615601, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.25, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.692, 'eval_steps_per_second': 12.692, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.05s/it]


{'train_runtime': 43.1089, 'train_samples_per_second': 3.758, 'train_steps_per_second': 0.487, 'train_loss': 0.7932256062825521, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.26it/s]
100%|██████████| 1/1 [00:00<00:00, 151.98it/s]
100%|██████████| 1/1 [00:00<00:00, 164.57it/s]
Casting the dataset: 100%|██████████| 55/55 [00:00<00:00, 18078.90 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 156.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.8727377653121948, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.225, 'eval_steps_per_second': 12.225, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:28<00:13,  1.91s/it]

{'eval_loss': 0.8327656984329224, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0817, 'eval_samples_per_second': 12.243, 'eval_steps_per_second': 12.243, 'epoch': 2.0}


100%|██████████| 21/21 [00:42<00:00,  1.90s/it]
100%|██████████| 21/21 [00:42<00:00,  1.90s/it]

{'eval_loss': 0.8116428852081299, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.25, 'eval_runtime': 0.088, 'eval_samples_per_second': 11.366, 'eval_steps_per_second': 11.366, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.09s/it]


{'train_runtime': 43.8723, 'train_samples_per_second': 3.761, 'train_steps_per_second': 0.479, 'train_loss': 0.7932476770310175, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.32it/s]
100%|██████████| 1/1 [00:00<00:00, 164.59it/s]
100%|██████████| 1/1 [00:00<00:00, 165.06it/s]
Casting the dataset: 100%|██████████| 55/55 [00:00<00:00, 53055.82 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 958.48 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.46 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 154.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 317.32 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.8727377653121948, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.071, 'eval_steps_per_second': 12.071, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:13,  1.86s/it]

{'eval_loss': 0.8327656984329224, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0739, 'eval_samples_per_second': 13.535, 'eval_steps_per_second': 13.535, 'epoch': 2.0}


100%|██████████| 21/21 [00:41<00:00,  1.88s/it]
100%|██████████| 21/21 [00:41<00:00,  1.88s/it]

{'eval_loss': 0.8116428852081299, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.25, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.292, 'eval_steps_per_second': 12.292, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.05s/it]


{'train_runtime': 43.1118, 'train_samples_per_second': 3.827, 'train_steps_per_second': 0.487, 'train_loss': 0.7932476770310175, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.08it/s]
100%|██████████| 1/1 [00:00<00:00, 141.81it/s]
100%|██████████| 1/1 [00:00<00:00, 122.88it/s]
Casting the dataset: 100%|██████████| 55/55 [00:00<00:00, 27475.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 156.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 486.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.8727377653121948, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.079, 'eval_steps_per_second': 12.079, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:13,  1.87s/it]

{'eval_loss': 0.8327656984329224, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.847, 'eval_steps_per_second': 12.847, 'epoch': 2.0}


100%|██████████| 21/21 [00:42<00:00,  1.90s/it]
100%|██████████| 21/21 [00:42<00:00,  1.90s/it]

{'eval_loss': 0.8116428852081299, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.25, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.14, 'eval_steps_per_second': 12.14, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.07s/it]


{'train_runtime': 43.4136, 'train_samples_per_second': 3.801, 'train_steps_per_second': 0.484, 'train_loss': 0.7932476770310175, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.48it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 164.71it/s]
Casting the dataset: 100%|██████████| 55/55 [00:00<00:00, 27515.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 154.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.8727377653121948, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.624, 'eval_steps_per_second': 11.624, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:13,  1.88s/it]

{'eval_loss': 0.8327656984329224, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.372, 'eval_steps_per_second': 13.372, 'epoch': 2.0}


100%|██████████| 21/21 [00:42<00:00,  1.89s/it]
100%|██████████| 21/21 [00:42<00:00,  1.89s/it]

{'eval_loss': 0.8116428852081299, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.25, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.079, 'eval_steps_per_second': 12.079, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.07s/it]


{'train_runtime': 43.4957, 'train_samples_per_second': 3.793, 'train_steps_per_second': 0.483, 'train_loss': 0.7932476770310175, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 141.72it/s]
100%|██████████| 1/1 [00:00<00:00, 164.77it/s]
Casting the dataset: 100%|██████████| 55/55 [00:00<00:00, 18321.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 155.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.89 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream tas

{'eval_loss': 0.8727377653121948, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0763, 'eval_samples_per_second': 13.098, 'eval_steps_per_second': 13.098, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:28<00:13,  1.91s/it]

{'eval_loss': 0.8327656984329224, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.006, 'eval_steps_per_second': 12.006, 'epoch': 2.0}


100%|██████████| 21/21 [00:42<00:00,  1.90s/it]
100%|██████████| 21/21 [00:42<00:00,  1.90s/it]

{'eval_loss': 0.8116428852081299, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.25, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.922, 'eval_steps_per_second': 11.922, 'epoch': 3.0}


100%|██████████| 21/21 [00:44<00:00,  2.10s/it]


{'train_runtime': 44.1453, 'train_samples_per_second': 3.738, 'train_steps_per_second': 0.476, 'train_loss': 0.7932476770310175, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.66it/s]
100%|██████████| 1/1 [00:00<00:00, 141.73it/s]
100%|██████████| 1/1 [00:00<00:00, 165.21it/s]
Casting the dataset: 100%|██████████| 56/56 [00:00<00:00, 28015.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 157.24 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.8730126619338989, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0919, 'eval_samples_per_second': 10.876, 'eval_steps_per_second': 10.876, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:28<00:13,  1.94s/it]

{'eval_loss': 0.8312608003616333, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.212, 'eval_steps_per_second': 12.212, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:42<00:00,  1.94s/it]

{'eval_loss': 0.8092043995857239, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.007, 'eval_steps_per_second': 13.007, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.09s/it]


{'train_runtime': 43.9774, 'train_samples_per_second': 3.82, 'train_steps_per_second': 0.478, 'train_loss': 0.7933943612234933, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.14it/s]
100%|██████████| 1/1 [00:00<00:00, 165.16it/s]
100%|██████████| 1/1 [00:00<00:00, 161.98it/s]
Casting the dataset: 100%|██████████| 56/56 [00:00<00:00, 27455.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 959.14 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 156.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.8730126619338989, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.926, 'eval_steps_per_second': 11.926, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:28<00:13,  1.96s/it]

{'eval_loss': 0.8312608003616333, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.916, 'eval_steps_per_second': 11.916, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:43<00:00,  1.97s/it]

{'eval_loss': 0.8092043995857239, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.223, 'eval_steps_per_second': 12.223, 'epoch': 3.0}


100%|██████████| 21/21 [00:44<00:00,  2.11s/it]


{'train_runtime': 44.4056, 'train_samples_per_second': 3.783, 'train_steps_per_second': 0.473, 'train_loss': 0.7933943612234933, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.66it/s]
100%|██████████| 1/1 [00:00<00:00, 166.61it/s]
100%|██████████| 1/1 [00:00<00:00, 164.83it/s]
Casting the dataset: 100%|██████████| 56/56 [00:00<00:00, 27995.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 154.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 465.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.8730126619338989, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.06, 'eval_steps_per_second': 12.06, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:28<00:13,  1.94s/it]

{'eval_loss': 0.8312608003616333, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0864, 'eval_samples_per_second': 11.58, 'eval_steps_per_second': 11.58, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:42<00:00,  1.92s/it]

{'eval_loss': 0.8092043995857239, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.649, 'eval_steps_per_second': 11.649, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.09s/it]


{'train_runtime': 43.9837, 'train_samples_per_second': 3.82, 'train_steps_per_second': 0.477, 'train_loss': 0.7933943612234933, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.78it/s]
100%|██████████| 1/1 [00:00<00:00, 141.46it/s]
100%|██████████| 1/1 [00:00<00:00, 141.76it/s]
Casting the dataset: 100%|██████████| 56/56 [00:00<00:00, 28008.71 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 148.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.8730126619338989, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.138, 'eval_steps_per_second': 12.138, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:28<00:13,  1.97s/it]

{'eval_loss': 0.8312608003616333, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.788, 'eval_steps_per_second': 11.788, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:43<00:00,  1.95s/it]

{'eval_loss': 0.8092043995857239, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.215, 'eval_steps_per_second': 12.215, 'epoch': 3.0}


100%|██████████| 21/21 [00:44<00:00,  2.11s/it]


{'train_runtime': 44.403, 'train_samples_per_second': 3.784, 'train_steps_per_second': 0.473, 'train_loss': 0.7933943612234933, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.88it/s]
100%|██████████| 1/1 [00:00<00:00, 141.41it/s]
100%|██████████| 1/1 [00:00<00:00, 141.24it/s]
Casting the dataset: 100%|██████████| 56/56 [00:00<00:00, 18678.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 155.92 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.19 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.8730126619338989, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.789, 'eval_steps_per_second': 11.789, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:13,  1.93s/it]

{'eval_loss': 0.8312608003616333, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.787, 'eval_steps_per_second': 11.787, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:43<00:00,  1.96s/it]

{'eval_loss': 0.8092043995857239, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.219, 'eval_steps_per_second': 12.219, 'epoch': 3.0}


100%|██████████| 21/21 [00:44<00:00,  2.12s/it]


{'train_runtime': 44.5402, 'train_samples_per_second': 3.772, 'train_steps_per_second': 0.471, 'train_loss': 0.7933943612234933, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.54it/s]
100%|██████████| 1/1 [00:00<00:00, 141.19it/s]
100%|██████████| 1/1 [00:00<00:00, 165.00it/s]
Casting the dataset: 100%|██████████| 57/57 [00:00<00:00, 18749.54 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 156.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.885355532169342, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0874, 'eval_samples_per_second': 11.436, 'eval_steps_per_second': 11.436, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.57s/it]

{'eval_loss': 0.7905460596084595, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.694, 'eval_steps_per_second': 12.694, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:44<00:00,  1.58s/it]

{'eval_loss': 0.7758228778839111, 'eval_precision': 0.5, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.36363636363636365, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.4, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.703, 'eval_steps_per_second': 11.703, 'epoch': 3.0}


100%|██████████| 24/24 [00:45<00:00,  1.91s/it]


{'train_runtime': 45.9335, 'train_samples_per_second': 3.723, 'train_steps_per_second': 0.522, 'train_loss': 0.7139245669047037, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 76.10it/s]
100%|██████████| 1/1 [00:00<00:00, 76.37it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
Casting the dataset: 100%|██████████| 57/57 [00:00<00:00, 27838.30 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 946.58 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 147.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.8067596554756165, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.201, 'eval_steps_per_second': 13.201, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.55s/it]

{'eval_loss': 0.7027060985565186, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0845, 'eval_samples_per_second': 11.831, 'eval_steps_per_second': 11.831, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:44<00:00,  1.56s/it]

{'eval_loss': 0.6921994686126709, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.243, 'eval_steps_per_second': 11.243, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.93s/it]


{'train_runtime': 46.2101, 'train_samples_per_second': 3.7, 'train_steps_per_second': 0.519, 'train_loss': 0.8014580408732096, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 76.03it/s]
100%|██████████| 1/1 [00:00<00:00, 76.59it/s]
100%|██████████| 1/1 [00:00<00:00, 164.51it/s]
Casting the dataset: 100%|██████████| 57/57 [00:00<00:00, 18999.87 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 155.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.8067596554756165, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.93, 'eval_steps_per_second': 11.93, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.56s/it]

{'eval_loss': 0.7027060985565186, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.849, 'eval_steps_per_second': 12.849, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.56s/it]

{'eval_loss': 0.6921994686126709, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.206, 'eval_steps_per_second': 12.206, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.94s/it]


{'train_runtime': 46.5253, 'train_samples_per_second': 3.675, 'train_steps_per_second': 0.516, 'train_loss': 0.8014580408732096, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.90it/s]
100%|██████████| 1/1 [00:00<00:00, 130.57it/s]
100%|██████████| 1/1 [00:00<00:00, 165.07it/s]
Casting the dataset: 100%|██████████| 57/57 [00:00<00:00, 28457.96 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 951.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 155.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.8067596554756165, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.018, 'eval_steps_per_second': 13.018, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.54s/it]

{'eval_loss': 0.7027060985565186, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.774, 'eval_steps_per_second': 11.774, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:44<00:00,  1.55s/it]

{'eval_loss': 0.6921994686126709, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.286, 'eval_steps_per_second': 12.286, 'epoch': 3.0}


100%|██████████| 24/24 [00:45<00:00,  1.90s/it]


{'train_runtime': 45.6923, 'train_samples_per_second': 3.742, 'train_steps_per_second': 0.525, 'train_loss': 0.8014580408732096, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.90it/s]
100%|██████████| 1/1 [00:00<00:00, 141.01it/s]
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
Casting the dataset: 100%|██████████| 57/57 [00:00<00:00, 19013.47 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 152.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.8067596554756165, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.083, 'eval_steps_per_second': 12.083, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:12,  1.58s/it]

{'eval_loss': 0.7027060985565186, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.534, 'eval_steps_per_second': 12.534, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.57s/it]

{'eval_loss': 0.6921994686126709, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.365, 'eval_steps_per_second': 12.365, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.95s/it]


{'train_runtime': 46.7807, 'train_samples_per_second': 3.655, 'train_steps_per_second': 0.513, 'train_loss': 0.8014580408732096, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.30it/s]
100%|██████████| 1/1 [00:00<00:00, 164.45it/s]
100%|██████████| 1/1 [00:00<00:00, 165.08it/s]
Casting the dataset: 100%|██████████| 58/58 [00:00<00:00, 28995.19 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 154.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.8137281537055969, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.216, 'eval_steps_per_second': 12.216, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.59s/it]

{'eval_loss': 0.7100156545639038, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0803, 'eval_samples_per_second': 12.449, 'eval_steps_per_second': 12.449, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:44<00:00,  1.58s/it]

{'eval_loss': 0.6606497168540955, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.09, 'eval_samples_per_second': 11.115, 'eval_steps_per_second': 11.115, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.93s/it]


{'train_runtime': 46.2216, 'train_samples_per_second': 3.764, 'train_steps_per_second': 0.519, 'train_loss': 0.7897920608520508, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 61.98it/s]
100%|██████████| 1/1 [00:00<00:00, 164.65it/s]
Casting the dataset: 100%|██████████| 58/58 [00:00<00:00, 19112.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 151.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 330.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8137281537055969, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.925, 'eval_steps_per_second': 11.925, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.58s/it]

{'eval_loss': 0.7100156545639038, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.261, 'eval_steps_per_second': 12.261, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:44<00:00,  1.58s/it]

{'eval_loss': 0.6606497168540955, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.011, 'eval_steps_per_second': 13.011, 'epoch': 3.0}


100%|██████████| 24/24 [00:45<00:00,  1.91s/it]


{'train_runtime': 45.9444, 'train_samples_per_second': 3.787, 'train_steps_per_second': 0.522, 'train_loss': 0.7897920608520508, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.22it/s]
100%|██████████| 1/1 [00:00<00:00, 140.94it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
Casting the dataset: 100%|██████████| 58/58 [00:00<00:00, 19056.06 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 925.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 144.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8137281537055969, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.364, 'eval_steps_per_second': 12.364, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.58s/it]

{'eval_loss': 0.7100156545639038, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.928, 'eval_steps_per_second': 11.928, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:44<00:00,  1.58s/it]

{'eval_loss': 0.6606497168540955, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0825, 'eval_samples_per_second': 12.12, 'eval_steps_per_second': 12.12, 'epoch': 3.0}


100%|██████████| 24/24 [00:45<00:00,  1.91s/it]


{'train_runtime': 45.7944, 'train_samples_per_second': 3.8, 'train_steps_per_second': 0.524, 'train_loss': 0.7897920608520508, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.22it/s]
100%|██████████| 1/1 [00:00<00:00, 165.08it/s]
100%|██████████| 1/1 [00:00<00:00, 164.33it/s]
Casting the dataset: 100%|██████████| 58/58 [00:00<00:00, 29019.40 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 970.23 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 148.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8137281537055969, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.057, 'eval_steps_per_second': 12.057, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.60s/it]

{'eval_loss': 0.7100156545639038, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0785, 'eval_samples_per_second': 12.744, 'eval_steps_per_second': 12.744, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:44<00:00,  1.59s/it]

{'eval_loss': 0.6606497168540955, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.08, 'eval_samples_per_second': 12.497, 'eval_steps_per_second': 12.497, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.92s/it]


{'train_runtime': 46.0859, 'train_samples_per_second': 3.776, 'train_steps_per_second': 0.521, 'train_loss': 0.7897920608520508, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.36it/s]
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
100%|██████████| 1/1 [00:00<00:00, 165.09it/s]
Casting the dataset: 100%|██████████| 58/58 [00:00<00:00, 29012.48 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 154.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 314.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.08 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8137281537055969, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.274, 'eval_steps_per_second': 12.274, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.60s/it]

{'eval_loss': 0.7100156545639038, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0763, 'eval_samples_per_second': 13.098, 'eval_steps_per_second': 13.098, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.62s/it]

{'eval_loss': 0.6606497168540955, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.357, 'eval_steps_per_second': 12.357, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.94s/it]


{'train_runtime': 46.5827, 'train_samples_per_second': 3.735, 'train_steps_per_second': 0.515, 'train_loss': 0.7897920608520508, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.08it/s]
100%|██████████| 1/1 [00:00<00:00, 141.75it/s]
100%|██████████| 1/1 [00:00<00:00, 164.70it/s]
Casting the dataset: 100%|██████████| 59/59 [00:00<00:00, 29498.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 961.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 151.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.8137067556381226, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.421, 'eval_steps_per_second': 11.421, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:13,  1.65s/it]

{'eval_loss': 0.7057072520256042, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.199, 'eval_steps_per_second': 12.199, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.65s/it]

{'eval_loss': 0.6677505373954773, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0909, 'eval_samples_per_second': 10.998, 'eval_steps_per_second': 10.998, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.96s/it]


{'train_runtime': 46.9751, 'train_samples_per_second': 3.768, 'train_steps_per_second': 0.511, 'train_loss': 0.7946964104970297, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.72it/s]
100%|██████████| 1/1 [00:00<00:00, 164.78it/s]
100%|██████████| 1/1 [00:00<00:00, 141.68it/s]
Casting the dataset: 100%|██████████| 59/59 [00:00<00:00, 29505.66 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 153.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 314.11 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
Yo

{'eval_loss': 0.8137067556381226, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.066, 'eval_steps_per_second': 12.066, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:13,  1.67s/it]

{'eval_loss': 0.7057072520256042, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.242, 'eval_steps_per_second': 11.242, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.68s/it]

{'eval_loss': 0.6677505373954773, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.636, 'eval_steps_per_second': 11.636, 'epoch': 3.0}


100%|██████████| 24/24 [00:47<00:00,  1.97s/it]


{'train_runtime': 47.179, 'train_samples_per_second': 3.752, 'train_steps_per_second': 0.509, 'train_loss': 0.7946964104970297, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.22it/s]
100%|██████████| 1/1 [00:00<00:00, 141.55it/s]
100%|██████████| 1/1 [00:00<00:00, 141.67it/s]
Casting the dataset: 100%|██████████| 59/59 [00:00<00:00, 29498.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 950.23 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 149.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8137067556381226, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0965, 'eval_samples_per_second': 10.364, 'eval_steps_per_second': 10.364, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.62s/it]

{'eval_loss': 0.7057072520256042, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.519, 'eval_steps_per_second': 12.519, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.61s/it]

{'eval_loss': 0.6677505373954773, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.05, 'eval_steps_per_second': 12.05, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.93s/it]


{'train_runtime': 46.3983, 'train_samples_per_second': 3.815, 'train_steps_per_second': 0.517, 'train_loss': 0.7946964104970297, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 140.93it/s]
100%|██████████| 1/1 [00:00<00:00, 141.70it/s]
100%|██████████| 1/1 [00:00<00:00, 164.99it/s]
Casting the dataset: 100%|██████████| 59/59 [00:00<00:00, 28618.47 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 149.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 459.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8137067556381226, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.912, 'eval_steps_per_second': 11.912, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:13,  1.63s/it]

{'eval_loss': 0.7057072520256042, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.088, 'eval_samples_per_second': 11.364, 'eval_steps_per_second': 11.364, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.64s/it]

{'eval_loss': 0.6677505373954773, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.11, 'eval_steps_per_second': 12.11, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.93s/it]


{'train_runtime': 46.4104, 'train_samples_per_second': 3.814, 'train_steps_per_second': 0.517, 'train_loss': 0.7946964104970297, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
100%|██████████| 1/1 [00:00<00:00, 165.14it/s]
Casting the dataset: 100%|██████████| 59/59 [00:00<00:00, 19050.34 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 150.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8137067556381226, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0946, 'eval_samples_per_second': 10.576, 'eval_steps_per_second': 10.576, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.62s/it]

{'eval_loss': 0.7057072520256042, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.045, 'eval_steps_per_second': 12.045, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.65s/it]

{'eval_loss': 0.6677505373954773, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1047, 'eval_samples_per_second': 9.553, 'eval_steps_per_second': 9.553, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.94s/it]


{'train_runtime': 46.6511, 'train_samples_per_second': 3.794, 'train_steps_per_second': 0.514, 'train_loss': 0.7946964104970297, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
100%|██████████| 1/1 [00:00<00:00, 164.88it/s]
100%|██████████| 1/1 [00:00<00:00, 164.59it/s]
Casting the dataset: 100%|██████████| 60/60 [00:00<00:00, 19663.87 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 137.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8135169744491577, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0961, 'eval_samples_per_second': 10.409, 'eval_steps_per_second': 10.409, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:13,  1.70s/it]

{'eval_loss': 0.7080159187316895, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.222, 'eval_steps_per_second': 12.222, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.71s/it]

{'eval_loss': 0.6703552007675171, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0803, 'eval_samples_per_second': 12.448, 'eval_steps_per_second': 12.448, 'epoch': 3.0}


100%|██████████| 24/24 [00:47<00:00,  1.97s/it]


{'train_runtime': 47.2999, 'train_samples_per_second': 3.806, 'train_steps_per_second': 0.507, 'train_loss': 0.7948362032572428, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.70it/s]
100%|██████████| 1/1 [00:00<00:00, 141.33it/s]
100%|██████████| 1/1 [00:00<00:00, 166.75it/s]
Casting the dataset: 100%|██████████| 60/60 [00:00<00:00, 20003.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 951.52 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 140.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pr

{'eval_loss': 0.8135169744491577, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1062, 'eval_samples_per_second': 9.417, 'eval_steps_per_second': 9.417, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:13,  1.71s/it]

{'eval_loss': 0.7080159187316895, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.849, 'eval_steps_per_second': 12.849, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.70s/it]

{'eval_loss': 0.6703552007675171, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.109, 'eval_samples_per_second': 9.173, 'eval_steps_per_second': 9.173, 'epoch': 3.0}


100%|██████████| 24/24 [00:47<00:00,  1.99s/it]


{'train_runtime': 47.6902, 'train_samples_per_second': 3.774, 'train_steps_per_second': 0.503, 'train_loss': 0.7948362032572428, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.85it/s]
100%|██████████| 1/1 [00:00<00:00, 165.03it/s]
100%|██████████| 1/1 [00:00<00:00, 165.11it/s]
Casting the dataset: 100%|██████████| 60/60 [00:00<00:00, 20015.77 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 960.45 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 141.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 237.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8135169744491577, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1352, 'eval_samples_per_second': 7.398, 'eval_steps_per_second': 7.398, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:13,  1.67s/it]

{'eval_loss': 0.7080159187316895, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.1283, 'eval_samples_per_second': 7.792, 'eval_steps_per_second': 7.792, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.70s/it]

{'eval_loss': 0.6703552007675171, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.093, 'eval_samples_per_second': 10.756, 'eval_steps_per_second': 10.756, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.96s/it]


{'train_runtime': 46.9627, 'train_samples_per_second': 3.833, 'train_steps_per_second': 0.511, 'train_loss': 0.7948362032572428, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
100%|██████████| 1/1 [00:00<00:00, 163.56it/s]
100%|██████████| 1/1 [00:00<00:00, 141.88it/s]
Casting the dataset: 100%|██████████| 60/60 [00:00<00:00, 29973.59 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 142.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8135169744491577, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.077, 'eval_steps_per_second': 12.077, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:13,  1.71s/it]

{'eval_loss': 0.7080159187316895, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0885, 'eval_samples_per_second': 11.304, 'eval_steps_per_second': 11.304, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.69s/it]

{'eval_loss': 0.6703552007675171, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1305, 'eval_samples_per_second': 7.666, 'eval_steps_per_second': 7.666, 'epoch': 3.0}


100%|██████████| 24/24 [00:47<00:00,  1.98s/it]


{'train_runtime': 47.4007, 'train_samples_per_second': 3.797, 'train_steps_per_second': 0.506, 'train_loss': 0.7948362032572428, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
100%|██████████| 1/1 [00:00<00:00, 141.77it/s]
100%|██████████| 1/1 [00:00<00:00, 141.33it/s]
Casting the dataset: 100%|██████████| 60/60 [00:00<00:00, 29289.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 140.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 488.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8135169744491577, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.203, 'eval_steps_per_second': 12.203, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:13,  1.71s/it]

{'eval_loss': 0.7080159187316895, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.096, 'eval_samples_per_second': 10.413, 'eval_steps_per_second': 10.413, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.72s/it]

{'eval_loss': 0.6703552007675171, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.081, 'eval_samples_per_second': 12.347, 'eval_steps_per_second': 12.347, 'epoch': 3.0}


100%|██████████| 24/24 [00:47<00:00,  1.98s/it]


{'train_runtime': 47.4818, 'train_samples_per_second': 3.791, 'train_steps_per_second': 0.505, 'train_loss': 0.7948362032572428, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.41it/s]
100%|██████████| 1/1 [00:00<00:00, 141.86it/s]
100%|██████████| 1/1 [00:00<00:00, 173.05it/s]
Casting the dataset: 100%|██████████| 61/61 [00:00<00:00, 19972.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 140.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8140155673027039, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.902, 'eval_steps_per_second': 11.902, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:14,  1.76s/it]

{'eval_loss': 0.7088074684143066, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.017, 'eval_steps_per_second': 13.017, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.76s/it]

{'eval_loss': 0.6798253655433655, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.639, 'eval_steps_per_second': 11.639, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.00s/it]


{'train_runtime': 48.0101, 'train_samples_per_second': 3.812, 'train_steps_per_second': 0.5, 'train_loss': 0.7898664474487305, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.18it/s]
100%|██████████| 1/1 [00:00<00:00, 44.95it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
Casting the dataset: 100%|██████████| 61/61 [00:00<00:00, 20329.96 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 955.20 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 141.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8140155673027039, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.766, 'eval_steps_per_second': 11.766, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:14,  1.77s/it]

{'eval_loss': 0.7088074684143066, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.517, 'eval_steps_per_second': 12.517, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.77s/it]

{'eval_loss': 0.6798253655433655, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.059, 'eval_steps_per_second': 12.059, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.01s/it]


{'train_runtime': 48.2144, 'train_samples_per_second': 3.796, 'train_steps_per_second': 0.498, 'train_loss': 0.7898664474487305, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.99it/s]
100%|██████████| 1/1 [00:00<00:00, 90.21it/s]
100%|██████████| 1/1 [00:00<00:00, 152.37it/s]
Casting the dataset: 100%|██████████| 61/61 [00:00<00:00, 30502.21 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 131.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8140155673027039, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.362, 'eval_steps_per_second': 12.362, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:13,  1.74s/it]

{'eval_loss': 0.7088074684143066, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0759, 'eval_samples_per_second': 13.176, 'eval_steps_per_second': 13.176, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.75s/it]

{'eval_loss': 0.6798253655433655, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.073, 'eval_steps_per_second': 12.073, 'epoch': 3.0}


100%|██████████| 24/24 [00:47<00:00,  1.99s/it]


{'train_runtime': 47.7074, 'train_samples_per_second': 3.836, 'train_steps_per_second': 0.503, 'train_loss': 0.7898664474487305, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.07it/s]
100%|██████████| 1/1 [00:00<00:00, 142.81it/s]
100%|██████████| 1/1 [00:00<00:00, 141.08it/s]
Casting the dataset: 100%|██████████| 61/61 [00:00<00:00, 29910.28 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 956.73 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 141.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 391.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8140155673027039, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.649, 'eval_steps_per_second': 11.649, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:14,  1.77s/it]

{'eval_loss': 0.7088074684143066, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.37, 'eval_steps_per_second': 12.37, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.77s/it]

{'eval_loss': 0.6798253655433655, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.021, 'eval_steps_per_second': 13.021, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.01s/it]


{'train_runtime': 48.1826, 'train_samples_per_second': 3.798, 'train_steps_per_second': 0.498, 'train_loss': 0.7898664474487305, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.61it/s]
100%|██████████| 1/1 [00:00<00:00, 164.73it/s]
100%|██████████| 1/1 [00:00<00:00, 164.56it/s]
Casting the dataset: 100%|██████████| 61/61 [00:00<00:00, 20342.89 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 138.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.48 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8140155673027039, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.899, 'eval_steps_per_second': 11.899, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:14,  1.75s/it]

{'eval_loss': 0.7088074684143066, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0853, 'eval_samples_per_second': 11.728, 'eval_steps_per_second': 11.728, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.75s/it]

{'eval_loss': 0.6798253655433655, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.362, 'eval_steps_per_second': 12.362, 'epoch': 3.0}


100%|██████████| 24/24 [00:47<00:00,  1.99s/it]


{'train_runtime': 47.6798, 'train_samples_per_second': 3.838, 'train_steps_per_second': 0.503, 'train_loss': 0.7898664474487305, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.39it/s]
100%|██████████| 1/1 [00:00<00:00, 123.65it/s]
100%|██████████| 1/1 [00:00<00:00, 164.42it/s]
Casting the dataset: 100%|██████████| 62/62 [00:00<00:00, 31017.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.98 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 137.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 323.61 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8149617910385132, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.847, 'eval_steps_per_second': 12.847, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:14,  1.80s/it]

{'eval_loss': 0.7077144384384155, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.359, 'eval_steps_per_second': 12.359, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.81s/it]

{'eval_loss': 0.6637244820594788, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.193, 'eval_steps_per_second': 13.193, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.01s/it]


{'train_runtime': 48.3074, 'train_samples_per_second': 3.85, 'train_steps_per_second': 0.497, 'train_loss': 0.7887651920318604, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.55it/s]
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
100%|██████████| 1/1 [00:00<00:00, 141.48it/s]
Casting the dataset: 100%|██████████| 62/62 [00:00<00:00, 30994.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 137.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 487.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8149617910385132, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.369, 'eval_steps_per_second': 12.369, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:14,  1.80s/it]

{'eval_loss': 0.7077144384384155, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.194, 'eval_steps_per_second': 13.194, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.80s/it]

{'eval_loss': 0.6637244820594788, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.027, 'eval_steps_per_second': 13.027, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.00s/it]


{'train_runtime': 48.0478, 'train_samples_per_second': 3.871, 'train_steps_per_second': 0.5, 'train_loss': 0.7887651920318604, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.95it/s]
100%|██████████| 1/1 [00:00<00:00, 61.93it/s]
100%|██████████| 1/1 [00:00<00:00, 164.79it/s]
Casting the dataset: 100%|██████████| 62/62 [00:00<00:00, 20658.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 138.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8149617910385132, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0797, 'eval_samples_per_second': 12.541, 'eval_steps_per_second': 12.541, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:14,  1.82s/it]

{'eval_loss': 0.7077144384384155, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0906, 'eval_samples_per_second': 11.038, 'eval_steps_per_second': 11.038, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.82s/it]

{'eval_loss': 0.6637244820594788, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.016, 'eval_steps_per_second': 13.016, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.03s/it]


{'train_runtime': 48.6151, 'train_samples_per_second': 3.826, 'train_steps_per_second': 0.494, 'train_loss': 0.7887651920318604, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.25it/s]
100%|██████████| 1/1 [00:00<00:00, 123.80it/s]
100%|██████████| 1/1 [00:00<00:00, 162.55it/s]
Casting the dataset: 100%|██████████| 62/62 [00:00<00:00, 31005.94 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 135.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
Yo

{'eval_loss': 0.8149617910385132, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.37, 'eval_steps_per_second': 12.37, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:14,  1.83s/it]

{'eval_loss': 0.7077144384384155, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.839, 'eval_steps_per_second': 12.839, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.82s/it]

{'eval_loss': 0.6637244820594788, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.027, 'eval_steps_per_second': 13.027, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.02s/it]


{'train_runtime': 48.5035, 'train_samples_per_second': 3.835, 'train_steps_per_second': 0.495, 'train_loss': 0.7887651920318604, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.51it/s]
100%|██████████| 1/1 [00:00<00:00, 98.59it/s]
100%|██████████| 1/1 [00:00<00:00, 164.88it/s]
Casting the dataset: 100%|██████████| 62/62 [00:00<00:00, 30998.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 488.90 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 136.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.8149617910385132, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.081, 'eval_samples_per_second': 12.353, 'eval_steps_per_second': 12.353, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:14,  1.81s/it]

{'eval_loss': 0.7077144384384155, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.79, 'eval_steps_per_second': 11.79, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.82s/it]

{'eval_loss': 0.6637244820594788, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.829, 'eval_steps_per_second': 12.829, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.03s/it]


{'train_runtime': 48.6274, 'train_samples_per_second': 3.825, 'train_steps_per_second': 0.494, 'train_loss': 0.7887651920318604, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
100%|██████████| 1/1 [00:00<00:00, 141.52it/s]
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
Casting the dataset: 100%|██████████| 63/63 [00:00<00:00, 20716.67 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 130.72 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 487.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8143603205680847, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0811, 'eval_samples_per_second': 12.327, 'eval_steps_per_second': 12.327, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:14,  1.86s/it]

{'eval_loss': 0.7038837671279907, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.186, 'eval_steps_per_second': 13.186, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.86s/it]

{'eval_loss': 0.6637706160545349, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0955, 'eval_samples_per_second': 10.472, 'eval_steps_per_second': 10.472, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.04s/it]


{'train_runtime': 48.8469, 'train_samples_per_second': 3.869, 'train_steps_per_second': 0.491, 'train_loss': 0.7896996339162191, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
100%|██████████| 1/1 [00:00<00:00, 141.83it/s]
Casting the dataset: 100%|██████████| 63/63 [00:00<00:00, 31506.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 129.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.63 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8143603205680847, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.788, 'eval_steps_per_second': 11.788, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:14,  1.85s/it]

{'eval_loss': 0.7038837671279907, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.918, 'eval_steps_per_second': 11.918, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.85s/it]

{'eval_loss': 0.6637706160545349, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.858, 'eval_steps_per_second': 12.858, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.03s/it]


{'train_runtime': 48.6749, 'train_samples_per_second': 3.883, 'train_steps_per_second': 0.493, 'train_loss': 0.7896996339162191, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 164.62it/s]
Casting the dataset: 100%|██████████| 63/63 [00:00<00:00, 21006.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 131.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8143603205680847, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.925, 'eval_steps_per_second': 11.925, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:15,  1.91s/it]

{'eval_loss': 0.7038837671279907, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0804, 'eval_samples_per_second': 12.439, 'eval_steps_per_second': 12.439, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:48<00:00,  1.90s/it]

{'eval_loss': 0.6637706160545349, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.218, 'eval_steps_per_second': 12.218, 'epoch': 3.0}


100%|██████████| 24/24 [00:49<00:00,  2.07s/it]


{'train_runtime': 49.7179, 'train_samples_per_second': 3.801, 'train_steps_per_second': 0.483, 'train_loss': 0.7896996339162191, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.74it/s]
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
100%|██████████| 1/1 [00:00<00:00, 164.63it/s]
Casting the dataset: 100%|██████████| 63/63 [00:00<00:00, 20687.48 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 960.45 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 131.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8143603205680847, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.087, 'eval_samples_per_second': 11.493, 'eval_steps_per_second': 11.493, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:15,  1.88s/it]

{'eval_loss': 0.7038837671279907, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.37, 'eval_steps_per_second': 12.37, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:48<00:00,  1.88s/it]

{'eval_loss': 0.6637706160545349, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.371, 'eval_steps_per_second': 12.371, 'epoch': 3.0}


100%|██████████| 24/24 [00:49<00:00,  2.05s/it]


{'train_runtime': 49.2826, 'train_samples_per_second': 3.835, 'train_steps_per_second': 0.487, 'train_loss': 0.7896996339162191, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.33it/s]
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
100%|██████████| 1/1 [00:00<00:00, 141.53it/s]
Casting the dataset: 100%|██████████| 63/63 [00:00<00:00, 21009.87 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 132.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.56 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8143603205680847, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.245, 'eval_steps_per_second': 11.245, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:15,  1.90s/it]

{'eval_loss': 0.7038837671279907, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.093, 'eval_samples_per_second': 10.751, 'eval_steps_per_second': 10.751, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:48<00:00,  1.89s/it]

{'eval_loss': 0.6637706160545349, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.922, 'eval_steps_per_second': 11.922, 'epoch': 3.0}


100%|██████████| 24/24 [00:49<00:00,  2.06s/it]


{'train_runtime': 49.4319, 'train_samples_per_second': 3.823, 'train_steps_per_second': 0.486, 'train_loss': 0.7896996339162191, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.01it/s]
100%|██████████| 1/1 [00:00<00:00, 141.48it/s]
100%|██████████| 1/1 [00:00<00:00, 164.88it/s]
Casting the dataset: 100%|██████████| 64/64 [00:00<00:00, 21348.45 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 132.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8142004013061523, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0938, 'eval_samples_per_second': 10.666, 'eval_steps_per_second': 10.666, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:15,  1.91s/it]

{'eval_loss': 0.703582763671875, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.204, 'eval_steps_per_second': 12.204, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:48<00:00,  1.91s/it]

{'eval_loss': 0.6558564901351929, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.783, 'eval_steps_per_second': 11.783, 'epoch': 3.0}


100%|██████████| 24/24 [00:49<00:00,  2.07s/it]


{'train_runtime': 49.5699, 'train_samples_per_second': 3.873, 'train_steps_per_second': 0.484, 'train_loss': 0.7984340190887451, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 139.37it/s]
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
100%|██████████| 1/1 [00:00<00:00, 142.90it/s]
Casting the dataset: 100%|██████████| 64/64 [00:00<00:00, 32013.77 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 131.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8142004013061523, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0817, 'eval_samples_per_second': 12.247, 'eval_steps_per_second': 12.247, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:32<00:15,  1.97s/it]

{'eval_loss': 0.703582763671875, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.229, 'eval_steps_per_second': 12.229, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:49<00:00,  1.97s/it]

{'eval_loss': 0.6558564901351929, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.529, 'eval_steps_per_second': 12.529, 'epoch': 3.0}


100%|██████████| 24/24 [00:50<00:00,  2.11s/it]


{'train_runtime': 50.5454, 'train_samples_per_second': 3.799, 'train_steps_per_second': 0.475, 'train_loss': 0.7984340190887451, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.43it/s]
100%|██████████| 1/1 [00:00<00:00, 164.81it/s]
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
Casting the dataset: 100%|██████████| 64/64 [00:00<00:00, 20937.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 129.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8142004013061523, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.217, 'eval_steps_per_second': 12.217, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:15,  1.93s/it]

{'eval_loss': 0.703582763671875, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.506, 'eval_steps_per_second': 11.506, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:48<00:00,  1.99s/it]

{'eval_loss': 0.6558564901351929, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.215, 'eval_steps_per_second': 12.215, 'epoch': 3.0}


100%|██████████| 24/24 [00:50<00:00,  2.08s/it]


{'train_runtime': 50.033, 'train_samples_per_second': 3.837, 'train_steps_per_second': 0.48, 'train_loss': 0.7984340190887451, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.61it/s]
100%|██████████| 1/1 [00:00<00:00, 164.50it/s]
100%|██████████| 1/1 [00:00<00:00, 139.71it/s]
Casting the dataset: 100%|██████████| 64/64 [00:00<00:00, 21319.63 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 127.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8142004013061523, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.07, 'eval_steps_per_second': 12.07, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:14,  1.87s/it]

{'eval_loss': 0.703582763671875, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.366, 'eval_steps_per_second': 12.366, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.88s/it]

{'eval_loss': 0.6558564901351929, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.775, 'eval_steps_per_second': 11.775, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.04s/it]


{'train_runtime': 48.9337, 'train_samples_per_second': 3.924, 'train_steps_per_second': 0.49, 'train_loss': 0.7984340190887451, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.89it/s]
100%|██████████| 1/1 [00:00<00:00, 141.01it/s]
100%|██████████| 1/1 [00:00<00:00, 164.82it/s]
Casting the dataset: 100%|██████████| 64/64 [00:00<00:00, 21029.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 130.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8142004013061523, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.79, 'eval_steps_per_second': 11.79, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:14,  1.87s/it]

{'eval_loss': 0.703582763671875, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.279, 'eval_steps_per_second': 12.279, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.86s/it]

{'eval_loss': 0.6558564901351929, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.746, 'eval_steps_per_second': 11.746, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.03s/it]


{'train_runtime': 48.8293, 'train_samples_per_second': 3.932, 'train_steps_per_second': 0.492, 'train_loss': 0.7984340190887451, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.85it/s]
100%|██████████| 1/1 [00:00<00:00, 164.61it/s]
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
Casting the dataset: 100%|██████████| 65/65 [00:00<00:00, 21372.67 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 466.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 125.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6942602396011353, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.929, 'eval_steps_per_second': 11.929, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:13,  1.52s/it]

{'eval_loss': 0.6612875461578369, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0863, 'eval_samples_per_second': 11.587, 'eval_steps_per_second': 11.587, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:50<00:00,  1.58s/it]

{'eval_loss': 0.6322593688964844, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0923, 'eval_samples_per_second': 10.831, 'eval_steps_per_second': 10.831, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.93s/it]


{'train_runtime': 52.1447, 'train_samples_per_second': 3.74, 'train_steps_per_second': 0.518, 'train_loss': 0.7232139022262009, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 76.58it/s]
100%|██████████| 1/1 [00:00<00:00, 82.48it/s]
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
Casting the dataset: 100%|██████████| 65/65 [00:00<00:00, 32513.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 120.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.8672357201576233, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.793, 'eval_steps_per_second': 11.793, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:32<00:13,  1.54s/it]

{'eval_loss': 0.7206873893737793, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0921, 'eval_samples_per_second': 10.858, 'eval_steps_per_second': 10.858, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:50<00:00,  1.54s/it]

{'eval_loss': 0.7154666185379028, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0874, 'eval_samples_per_second': 11.437, 'eval_steps_per_second': 11.437, 'epoch': 3.0}


100%|██████████| 27/27 [00:51<00:00,  1.90s/it]


{'train_runtime': 51.3314, 'train_samples_per_second': 3.799, 'train_steps_per_second': 0.526, 'train_loss': 0.5882577542905454, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 71.12it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 82.56it/s]
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
Casting the dataset: 100%|██████████| 65/65 [00:00<00:00, 21397.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 955.20 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 127.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.8672357201576233, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.51, 'eval_steps_per_second': 11.51, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:32<00:13,  1.52s/it]

{'eval_loss': 0.7206873893737793, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.377, 'eval_steps_per_second': 12.377, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:49<00:00,  1.52s/it]

{'eval_loss': 0.7154666185379028, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0874, 'eval_samples_per_second': 11.445, 'eval_steps_per_second': 11.445, 'epoch': 3.0}


100%|██████████| 27/27 [00:50<00:00,  1.88s/it]


{'train_runtime': 50.6899, 'train_samples_per_second': 3.847, 'train_steps_per_second': 0.533, 'train_loss': 0.5882577542905454, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.55it/s]
100%|██████████| 1/1 [00:00<00:00, 165.03it/s]
Casting the dataset: 100%|██████████| 65/65 [00:00<00:00, 21392.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 120.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8672357201576233, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.224, 'eval_steps_per_second': 12.224, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:32<00:13,  1.50s/it]

{'eval_loss': 0.7206873893737793, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.935, 'eval_steps_per_second': 11.935, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:50<00:00,  1.54s/it]

{'eval_loss': 0.7154666185379028, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0811, 'eval_samples_per_second': 12.325, 'eval_steps_per_second': 12.325, 'epoch': 3.0}


100%|██████████| 27/27 [00:51<00:00,  1.90s/it]


{'train_runtime': 51.3835, 'train_samples_per_second': 3.795, 'train_steps_per_second': 0.525, 'train_loss': 0.5882577542905454, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.34it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.91it/s]
100%|██████████| 1/1 [00:00<00:00, 164.58it/s]
Casting the dataset: 100%|██████████| 65/65 [00:00<00:00, 21657.91 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 125.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8672357201576233, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.01, 'eval_steps_per_second': 13.01, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:14,  1.57s/it]

{'eval_loss': 0.7206873893737793, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.005, 'eval_steps_per_second': 13.005, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:50<00:00,  1.56s/it]

{'eval_loss': 0.7154666185379028, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0855, 'eval_samples_per_second': 11.698, 'eval_steps_per_second': 11.698, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.94s/it]


{'train_runtime': 52.2916, 'train_samples_per_second': 3.729, 'train_steps_per_second': 0.516, 'train_loss': 0.5882577542905454, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.22it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
Casting the dataset: 100%|██████████| 66/66 [00:00<00:00, 21474.21 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 963.99 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 122.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 311.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8658685684204102, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.29, 'eval_steps_per_second': 12.29, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:32<00:14,  1.56s/it]

{'eval_loss': 0.7196649312973022, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.363, 'eval_steps_per_second': 12.363, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:49<00:00,  1.56s/it]

{'eval_loss': 0.712068498134613, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.907, 'eval_steps_per_second': 11.907, 'epoch': 3.0}


100%|██████████| 27/27 [00:50<00:00,  1.88s/it]


{'train_runtime': 50.7508, 'train_samples_per_second': 3.901, 'train_steps_per_second': 0.532, 'train_loss': 0.586324727093732, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.82it/s]
100%|██████████| 1/1 [00:00<00:00, 165.33it/s]
Casting the dataset: 100%|██████████| 66/66 [00:00<00:00, 33002.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 932.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 125.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8658685684204102, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.932, 'eval_steps_per_second': 11.932, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:32<00:14,  1.57s/it]

{'eval_loss': 0.7196649312973022, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0746, 'eval_samples_per_second': 13.411, 'eval_steps_per_second': 13.411, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:50<00:00,  1.62s/it]

{'eval_loss': 0.712068498134613, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.197, 'eval_steps_per_second': 12.197, 'epoch': 3.0}


100%|██████████| 27/27 [00:51<00:00,  1.92s/it]


{'train_runtime': 51.787, 'train_samples_per_second': 3.823, 'train_steps_per_second': 0.521, 'train_loss': 0.586324727093732, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.23it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.04it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
Casting the dataset: 100%|██████████| 66/66 [00:00<00:00, 21982.38 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 950.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.33 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 124.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8658685684204102, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.062, 'eval_steps_per_second': 12.062, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:14,  1.60s/it]

{'eval_loss': 0.7196649312973022, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.215, 'eval_steps_per_second': 12.215, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:50<00:00,  1.60s/it]

{'eval_loss': 0.712068498134613, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.292, 'eval_steps_per_second': 12.292, 'epoch': 3.0}


100%|██████████| 27/27 [00:51<00:00,  1.92s/it]


{'train_runtime': 51.8141, 'train_samples_per_second': 3.821, 'train_steps_per_second': 0.521, 'train_loss': 0.586324727093732, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.14it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
Casting the dataset: 100%|██████████| 66/66 [00:00<00:00, 21998.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 947.22 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 124.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8658685684204102, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0835, 'eval_samples_per_second': 11.971, 'eval_steps_per_second': 11.971, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:32<00:14,  1.57s/it]

{'eval_loss': 0.7196649312973022, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.364, 'eval_steps_per_second': 13.364, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:49<00:00,  1.60s/it]

{'eval_loss': 0.712068498134613, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.201, 'eval_steps_per_second': 12.201, 'epoch': 3.0}


100%|██████████| 27/27 [00:50<00:00,  1.89s/it]


{'train_runtime': 50.9404, 'train_samples_per_second': 3.887, 'train_steps_per_second': 0.53, 'train_loss': 0.586324727093732, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.36it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.65it/s]
100%|██████████| 1/1 [00:00<00:00, 141.59it/s]
Casting the dataset: 100%|██████████| 66/66 [00:00<00:00, 32955.25 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 124.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8658685684204102, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.194, 'eval_steps_per_second': 13.194, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:14,  1.59s/it]

{'eval_loss': 0.7196649312973022, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.914, 'eval_steps_per_second': 11.914, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:50<00:00,  1.59s/it]

{'eval_loss': 0.712068498134613, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.06, 'eval_steps_per_second': 12.06, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.93s/it]


{'train_runtime': 52.1699, 'train_samples_per_second': 3.795, 'train_steps_per_second': 0.518, 'train_loss': 0.586324727093732, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.78it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 49.60it/s]
100%|██████████| 1/1 [00:00<00:00, 165.24it/s]
Casting the dataset: 100%|██████████| 67/67 [00:00<00:00, 32547.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 120.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8644143342971802, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.897, 'eval_steps_per_second': 11.897, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.68s/it]

{'eval_loss': 0.7198822498321533, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.242, 'eval_steps_per_second': 11.242, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.63s/it]

{'eval_loss': 0.7120697498321533, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1616, 'eval_samples_per_second': 6.188, 'eval_steps_per_second': 6.188, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.96s/it]


{'train_runtime': 52.8834, 'train_samples_per_second': 3.801, 'train_steps_per_second': 0.511, 'train_loss': 0.5857664037633825, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.32it/s]
100%|██████████| 1/1 [00:00<00:00, 164.51it/s]
Casting the dataset: 100%|██████████| 67/67 [00:00<00:00, 22324.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 486.24 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 121.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 279.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8644143342971802, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0789, 'eval_samples_per_second': 12.669, 'eval_steps_per_second': 12.669, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:14,  1.62s/it]

{'eval_loss': 0.7198822498321533, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.904, 'eval_steps_per_second': 11.904, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:50<00:00,  1.60s/it]

{'eval_loss': 0.7120697498321533, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.089, 'eval_samples_per_second': 11.24, 'eval_steps_per_second': 11.24, 'epoch': 3.0}


100%|██████████| 27/27 [00:51<00:00,  1.91s/it]


{'train_runtime': 51.4718, 'train_samples_per_second': 3.905, 'train_steps_per_second': 0.525, 'train_loss': 0.5857664037633825, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.49it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.11it/s]
100%|██████████| 1/1 [00:00<00:00, 164.81it/s]
Casting the dataset: 100%|██████████| 67/67 [00:00<00:00, 21976.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 960.45 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 119.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.74 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8644143342971802, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0863, 'eval_samples_per_second': 11.59, 'eval_steps_per_second': 11.59, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:14,  1.66s/it]

{'eval_loss': 0.7198822498321533, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0789, 'eval_samples_per_second': 12.681, 'eval_steps_per_second': 12.681, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.65s/it]

{'eval_loss': 0.7120697498321533, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.64, 'eval_steps_per_second': 11.64, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.96s/it]


{'train_runtime': 52.8519, 'train_samples_per_second': 3.803, 'train_steps_per_second': 0.511, 'train_loss': 0.5857664037633825, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.75it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.95it/s]
100%|██████████| 1/1 [00:00<00:00, 141.79it/s]
Casting the dataset: 100%|██████████| 67/67 [00:00<00:00, 32726.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 954.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 121.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.61 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8644143342971802, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.775, 'eval_steps_per_second': 11.775, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:14,  1.61s/it]

{'eval_loss': 0.7198822498321533, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.291, 'eval_steps_per_second': 12.291, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:50<00:00,  1.60s/it]

{'eval_loss': 0.7120697498321533, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0825, 'eval_samples_per_second': 12.127, 'eval_steps_per_second': 12.127, 'epoch': 3.0}


100%|██████████| 27/27 [00:51<00:00,  1.91s/it]


{'train_runtime': 51.6256, 'train_samples_per_second': 3.893, 'train_steps_per_second': 0.523, 'train_loss': 0.5857664037633825, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.70it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.77it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 67/67 [00:00<00:00, 22078.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 123.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8644143342971802, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.517, 'eval_steps_per_second': 11.517, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:14,  1.64s/it]

{'eval_loss': 0.7198822498321533, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.69, 'eval_steps_per_second': 12.69, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.65s/it]

{'eval_loss': 0.7120697498321533, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.252, 'eval_steps_per_second': 11.252, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.95s/it]


{'train_runtime': 52.5731, 'train_samples_per_second': 3.823, 'train_steps_per_second': 0.514, 'train_loss': 0.5857664037633825, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.39it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.00it/s]
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
Casting the dataset: 100%|██████████| 68/68 [00:00<00:00, 33990.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 873.27 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 108.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8635165691375732, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.1247, 'eval_samples_per_second': 8.02, 'eval_steps_per_second': 8.02, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.71s/it]

{'eval_loss': 0.7198538184165955, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.92, 'eval_steps_per_second': 11.92, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.65s/it]

{'eval_loss': 0.7122639417648315, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1405, 'eval_samples_per_second': 7.119, 'eval_steps_per_second': 7.119, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.96s/it]


{'train_runtime': 52.9088, 'train_samples_per_second': 3.856, 'train_steps_per_second': 0.51, 'train_loss': 0.5853333649811922, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.55it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.38it/s]
100%|██████████| 1/1 [00:00<00:00, 141.29it/s]
Casting the dataset: 100%|██████████| 68/68 [00:00<00:00, 22664.71 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 488.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 119.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8635165691375732, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.367, 'eval_steps_per_second': 12.367, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:15,  1.67s/it]

{'eval_loss': 0.7198538184165955, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0783, 'eval_samples_per_second': 12.777, 'eval_steps_per_second': 12.777, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.72s/it]

{'eval_loss': 0.7122639417648315, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.23, 'eval_steps_per_second': 12.23, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  1.97s/it]


{'train_runtime': 53.1054, 'train_samples_per_second': 3.841, 'train_steps_per_second': 0.508, 'train_loss': 0.5853333649811922, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.40it/s]
100%|██████████| 1/1 [00:00<00:00, 143.00it/s]
Casting the dataset: 100%|██████████| 68/68 [00:00<00:00, 22287.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 117.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8635165691375732, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0938, 'eval_samples_per_second': 10.667, 'eval_steps_per_second': 10.667, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.69s/it]

{'eval_loss': 0.7198538184165955, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1049, 'eval_samples_per_second': 9.534, 'eval_steps_per_second': 9.534, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.65s/it]

{'eval_loss': 0.7122639417648315, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1392, 'eval_samples_per_second': 7.186, 'eval_steps_per_second': 7.186, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.96s/it]


{'train_runtime': 52.7896, 'train_samples_per_second': 3.864, 'train_steps_per_second': 0.511, 'train_loss': 0.5853333649811922, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.62it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.70it/s]
100%|██████████| 1/1 [00:00<00:00, 164.89it/s]
Casting the dataset: 100%|██████████| 68/68 [00:00<00:00, 34014.63 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 119.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8635165691375732, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.212, 'eval_steps_per_second': 12.212, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:14,  1.67s/it]

{'eval_loss': 0.7198538184165955, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.358, 'eval_steps_per_second': 12.358, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.73s/it]

{'eval_loss': 0.7122639417648315, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.196, 'eval_steps_per_second': 13.196, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.96s/it]


{'train_runtime': 52.9459, 'train_samples_per_second': 3.853, 'train_steps_per_second': 0.51, 'train_loss': 0.5853333649811922, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.34it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.70it/s]
100%|██████████| 1/1 [00:00<00:00, 165.09it/s]
Casting the dataset: 100%|██████████| 68/68 [00:00<00:00, 22670.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 123.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.72 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8635165691375732, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.1303, 'eval_samples_per_second': 7.672, 'eval_steps_per_second': 7.672, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.70s/it]

{'eval_loss': 0.7198538184165955, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.219, 'eval_steps_per_second': 12.219, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.69s/it]

{'eval_loss': 0.7122639417648315, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1303, 'eval_samples_per_second': 7.675, 'eval_steps_per_second': 7.675, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  1.97s/it]


{'train_runtime': 53.1524, 'train_samples_per_second': 3.838, 'train_steps_per_second': 0.508, 'train_loss': 0.5853333649811922, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.16it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.43it/s]
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
Casting the dataset: 100%|██████████| 69/69 [00:00<00:00, 23014.47 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 122.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8634104132652283, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.21, 'eval_steps_per_second': 12.21, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.77s/it]

{'eval_loss': 0.7196618318557739, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.367, 'eval_steps_per_second': 12.367, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.75s/it]

{'eval_loss': 0.7095020413398743, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.925, 'eval_steps_per_second': 11.925, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  1.97s/it]


{'train_runtime': 53.3086, 'train_samples_per_second': 3.883, 'train_steps_per_second': 0.506, 'train_loss': 0.5849795164885344, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.05it/s]
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
Casting the dataset: 100%|██████████| 69/69 [00:00<00:00, 22999.84 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 866.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 121.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.14 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8634104132652283, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.0, 'eval_steps_per_second': 13.0, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.76s/it]

{'eval_loss': 0.7196618318557739, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.13, 'eval_steps_per_second': 12.13, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.76s/it]

{'eval_loss': 0.7095020413398743, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0804, 'eval_samples_per_second': 12.44, 'eval_steps_per_second': 12.44, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  1.98s/it]


{'train_runtime': 53.4293, 'train_samples_per_second': 3.874, 'train_steps_per_second': 0.505, 'train_loss': 0.5849795164885344, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.91it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.82it/s]
100%|██████████| 1/1 [00:00<00:00, 142.88it/s]
Casting the dataset: 100%|██████████| 69/69 [00:00<00:00, 22654.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 115.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8634104132652283, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.366, 'eval_steps_per_second': 12.366, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:15,  1.72s/it]

{'eval_loss': 0.7196618318557739, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.219, 'eval_steps_per_second': 12.219, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.73s/it]

{'eval_loss': 0.7095020413398743, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.214, 'eval_steps_per_second': 12.214, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.95s/it]


{'train_runtime': 52.5967, 'train_samples_per_second': 3.936, 'train_steps_per_second': 0.513, 'train_loss': 0.5849795164885344, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.20it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.64it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
Casting the dataset: 100%|██████████| 69/69 [00:00<00:00, 34510.73 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 117.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 315.74 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8634104132652283, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.374, 'eval_steps_per_second': 12.374, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.76s/it]

{'eval_loss': 0.7196618318557739, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.513, 'eval_steps_per_second': 11.513, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.76s/it]

{'eval_loss': 0.7095020413398743, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.374, 'eval_steps_per_second': 12.374, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  1.99s/it]


{'train_runtime': 53.7676, 'train_samples_per_second': 3.85, 'train_steps_per_second': 0.502, 'train_loss': 0.5849795164885344, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.53it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 140.94it/s]
100%|██████████| 1/1 [00:00<00:00, 164.65it/s]
Casting the dataset: 100%|██████████| 69/69 [00:00<00:00, 22990.70 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 964.43 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 122.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8634104132652283, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.362, 'eval_steps_per_second': 12.362, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.74s/it]

{'eval_loss': 0.7196618318557739, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0931, 'eval_samples_per_second': 10.738, 'eval_steps_per_second': 10.738, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.77s/it]

{'eval_loss': 0.7095020413398743, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.359, 'eval_steps_per_second': 12.359, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  1.99s/it]


{'train_runtime': 53.8056, 'train_samples_per_second': 3.847, 'train_steps_per_second': 0.502, 'train_loss': 0.5849795164885344, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.08it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.48it/s]
100%|██████████| 1/1 [00:00<00:00, 164.58it/s]
Casting the dataset: 100%|██████████| 70/70 [00:00<00:00, 23314.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 122.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.53 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.11 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8631302118301392, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.377, 'eval_steps_per_second': 13.377, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:16,  1.80s/it]

{'eval_loss': 0.7211480736732483, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.926, 'eval_steps_per_second': 11.926, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.82s/it]

{'eval_loss': 0.7089793682098389, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.507, 'eval_steps_per_second': 11.507, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.01s/it]


{'train_runtime': 54.2593, 'train_samples_per_second': 3.87, 'train_steps_per_second': 0.498, 'train_loss': 0.584691012347186, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.08it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.63it/s]
100%|██████████| 1/1 [00:00<00:00, 141.65it/s]
Casting the dataset: 100%|██████████| 70/70 [00:00<00:00, 34969.19 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.09 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 122.34 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.35 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8631302118301392, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.634, 'eval_steps_per_second': 11.634, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:15,  1.76s/it]

{'eval_loss': 0.7211480736732483, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.857, 'eval_steps_per_second': 12.857, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.78s/it]

{'eval_loss': 0.7089793682098389, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.072, 'eval_steps_per_second': 12.072, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  1.97s/it]


{'train_runtime': 53.0605, 'train_samples_per_second': 3.958, 'train_steps_per_second': 0.509, 'train_loss': 0.584691012347186, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.54it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.73it/s]
100%|██████████| 1/1 [00:00<00:00, 165.34it/s]
Casting the dataset: 100%|██████████| 70/70 [00:00<00:00, 23372.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 121.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 324.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8631302118301392, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.185, 'eval_steps_per_second': 13.185, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:16,  1.81s/it]

{'eval_loss': 0.7211480736732483, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.698, 'eval_steps_per_second': 12.698, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.81s/it]

{'eval_loss': 0.7089793682098389, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.365, 'eval_steps_per_second': 12.365, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.01s/it]


{'train_runtime': 54.3272, 'train_samples_per_second': 3.865, 'train_steps_per_second': 0.497, 'train_loss': 0.584691012347186, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
100%|██████████| 1/1 [00:00<00:00, 165.10it/s]
Casting the dataset: 100%|██████████| 70/70 [00:00<00:00, 23329.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 118.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.08 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8631302118301392, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.786, 'eval_steps_per_second': 11.786, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.78s/it]

{'eval_loss': 0.7211480736732483, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.217, 'eval_steps_per_second': 12.217, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.83s/it]

{'eval_loss': 0.7089793682098389, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0751, 'eval_samples_per_second': 13.319, 'eval_steps_per_second': 13.319, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.01s/it]


{'train_runtime': 54.2592, 'train_samples_per_second': 3.87, 'train_steps_per_second': 0.498, 'train_loss': 0.584691012347186, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.15it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.54it/s]
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
Casting the dataset: 100%|██████████| 70/70 [00:00<00:00, 23335.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 113.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
Yo

{'eval_loss': 0.8631302118301392, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.846, 'eval_steps_per_second': 12.846, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:16,  1.80s/it]

{'eval_loss': 0.7211480736732483, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.186, 'eval_steps_per_second': 13.186, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.81s/it]

{'eval_loss': 0.7089793682098389, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.011, 'eval_steps_per_second': 13.011, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.01s/it]


{'train_runtime': 54.1595, 'train_samples_per_second': 3.877, 'train_steps_per_second': 0.499, 'train_loss': 0.584691012347186, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 163.72it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.52it/s]
100%|██████████| 1/1 [00:00<00:00, 165.15it/s]
Casting the dataset: 100%|██████████| 71/71 [00:00<00:00, 23424.49 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 118.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.81 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
Yo

{'eval_loss': 0.8621225953102112, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0917, 'eval_samples_per_second': 10.906, 'eval_steps_per_second': 10.906, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:16,  1.87s/it]

{'eval_loss': 0.7219140529632568, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.968, 'eval_steps_per_second': 11.968, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:53<00:00,  1.86s/it]

{'eval_loss': 0.7096645832061768, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.307, 'eval_steps_per_second': 12.307, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


{'train_runtime': 54.7449, 'train_samples_per_second': 3.891, 'train_steps_per_second': 0.493, 'train_loss': 0.5844772126939561, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 65.92it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.33it/s]
100%|██████████| 1/1 [00:00<00:00, 164.68it/s]
Casting the dataset: 100%|██████████| 71/71 [00:00<00:00, 35498.34 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 940.43 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 624.99 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 120.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8621225953102112, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.61, 'eval_steps_per_second': 11.61, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:16,  1.85s/it]

{'eval_loss': 0.7219140529632568, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.372, 'eval_steps_per_second': 12.372, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:53<00:00,  1.86s/it]

{'eval_loss': 0.7096645832061768, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.287, 'eval_steps_per_second': 12.287, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.02s/it]


{'train_runtime': 54.5562, 'train_samples_per_second': 3.904, 'train_steps_per_second': 0.495, 'train_loss': 0.5844772126939561, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 122.72it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.66it/s]
100%|██████████| 1/1 [00:00<00:00, 164.95it/s]
Casting the dataset: 100%|██████████| 71/71 [00:00<00:00, 23683.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.50 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 488.39 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 117.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8621225953102112, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.246, 'eval_steps_per_second': 11.246, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:16,  1.82s/it]

{'eval_loss': 0.7219140529632568, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.512, 'eval_steps_per_second': 11.512, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:53<00:00,  1.89s/it]

{'eval_loss': 0.7096645832061768, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.115, 'eval_samples_per_second': 8.694, 'eval_steps_per_second': 8.694, 'epoch': 3.0}


100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


{'train_runtime': 55.1215, 'train_samples_per_second': 3.864, 'train_steps_per_second': 0.49, 'train_loss': 0.5844772126939561, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.68it/s]
100%|██████████| 1/1 [00:00<00:00, 141.75it/s]
Casting the dataset: 100%|██████████| 71/71 [00:00<00:00, 23351.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 117.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8621225953102112, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0855, 'eval_samples_per_second': 11.698, 'eval_steps_per_second': 11.698, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:16,  1.86s/it]

{'eval_loss': 0.7219140529632568, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.071, 'eval_steps_per_second': 12.071, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.81s/it]

{'eval_loss': 0.7096645832061768, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.791, 'eval_steps_per_second': 11.791, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.01s/it]


{'train_runtime': 54.1641, 'train_samples_per_second': 3.932, 'train_steps_per_second': 0.498, 'train_loss': 0.5844772126939561, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.96it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.19it/s]
100%|██████████| 1/1 [00:00<00:00, 141.71it/s]
Casting the dataset: 100%|██████████| 71/71 [00:00<00:00, 23658.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 118.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.24 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8621225953102112, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.219, 'eval_steps_per_second': 12.219, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:17,  1.89s/it]

{'eval_loss': 0.7219140529632568, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.775, 'eval_steps_per_second': 11.775, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:53<00:00,  1.89s/it]

{'eval_loss': 0.7096645832061768, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.707, 'eval_steps_per_second': 11.707, 'epoch': 3.0}


100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


{'train_runtime': 55.0, 'train_samples_per_second': 3.873, 'train_steps_per_second': 0.491, 'train_loss': 0.5844772126939561, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
100%|██████████| 1/1 [00:00<00:00, 164.46it/s]
Casting the dataset: 100%|██████████| 72/72 [00:00<00:00, 22829.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 118.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8613460659980774, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.83, 'eval_steps_per_second': 12.83, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:16,  1.85s/it]

{'eval_loss': 0.721423864364624, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.216, 'eval_steps_per_second': 12.216, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.85s/it]

{'eval_loss': 0.7085049152374268, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.792, 'eval_steps_per_second': 11.792, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.01s/it]


{'train_runtime': 54.1453, 'train_samples_per_second': 3.989, 'train_steps_per_second': 0.499, 'train_loss': 0.5841841167873807, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.35it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.32it/s]
100%|██████████| 1/1 [00:00<00:00, 165.04it/s]
Casting the dataset: 100%|██████████| 72/72 [00:00<00:00, 24007.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 964.21 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 117.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8613460659980774, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.366, 'eval_steps_per_second': 13.366, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:17,  1.93s/it]

{'eval_loss': 0.721423864364624, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.068, 'eval_steps_per_second': 12.068, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:54<00:00,  1.94s/it]

{'eval_loss': 0.7085049152374268, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.366, 'eval_steps_per_second': 12.366, 'epoch': 3.0}


100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


{'train_runtime': 55.3423, 'train_samples_per_second': 3.903, 'train_steps_per_second': 0.488, 'train_loss': 0.5841841167873807, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 176.87it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 163.20it/s]
100%|██████████| 1/1 [00:00<00:00, 165.09it/s]
Casting the dataset: 100%|██████████| 72/72 [00:00<00:00, 23988.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 114.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8613460659980774, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.919, 'eval_steps_per_second': 11.919, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:16,  1.86s/it]

{'eval_loss': 0.721423864364624, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0805, 'eval_samples_per_second': 12.419, 'eval_steps_per_second': 12.419, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.85s/it]

{'eval_loss': 0.7085049152374268, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0843, 'eval_samples_per_second': 11.857, 'eval_steps_per_second': 11.857, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.01s/it]


{'train_runtime': 54.2318, 'train_samples_per_second': 3.983, 'train_steps_per_second': 0.498, 'train_loss': 0.5841841167873807, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.75it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
100%|██████████| 1/1 [00:00<00:00, 165.30it/s]
Casting the dataset: 100%|██████████| 72/72 [00:00<00:00, 23999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 957.17 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 117.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8613460659980774, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.926, 'eval_steps_per_second': 11.926, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:17,  1.92s/it]

{'eval_loss': 0.721423864364624, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0862, 'eval_samples_per_second': 11.596, 'eval_steps_per_second': 11.596, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:53<00:00,  1.94s/it]

{'eval_loss': 0.7085049152374268, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.918, 'eval_steps_per_second': 11.918, 'epoch': 3.0}


100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


{'train_runtime': 55.2085, 'train_samples_per_second': 3.912, 'train_steps_per_second': 0.489, 'train_loss': 0.5841841167873807, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.48it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.57it/s]
100%|██████████| 1/1 [00:00<00:00, 164.94it/s]
Casting the dataset: 100%|██████████| 72/72 [00:00<00:00, 23622.49 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 117.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8613460659980774, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.372, 'eval_steps_per_second': 12.372, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:16,  1.87s/it]

{'eval_loss': 0.721423864364624, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.063, 'eval_steps_per_second': 12.063, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:53<00:00,  1.91s/it]

{'eval_loss': 0.7085049152374268, 'eval_precision': 0.25, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.18181818181818182, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.757, 'eval_steps_per_second': 11.757, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


{'train_runtime': 54.8423, 'train_samples_per_second': 3.939, 'train_steps_per_second': 0.492, 'train_loss': 0.5841841167873807, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.23it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.90it/s]
100%|██████████| 1/1 [00:00<00:00, 151.63it/s]
Casting the dataset: 100%|██████████| 73/73 [00:00<00:00, 24319.63 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 111.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8398906588554382, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.089, 'eval_samples_per_second': 11.235, 'eval_steps_per_second': 11.235, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:36<00:15,  1.51s/it]

{'eval_loss': 0.7155788540840149, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0875, 'eval_samples_per_second': 11.427, 'eval_steps_per_second': 11.427, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:55<00:00,  1.53s/it]

{'eval_loss': 0.6704784631729126, 'eval_precision': 0.3333333333333333, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.2, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.074, 'eval_steps_per_second': 12.074, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.90s/it]


{'train_runtime': 57.0442, 'train_samples_per_second': 3.839, 'train_steps_per_second': 0.526, 'train_loss': 0.5288530985514323, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.40it/s]
100%|██████████| 1/1 [00:00<00:00, 165.11it/s]
Casting the dataset: 100%|██████████| 73/73 [00:00<00:00, 35677.49 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 961.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 114.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.7118138670921326, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.193, 'eval_steps_per_second': 13.193, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:37<00:15,  1.54s/it]
 67%|██████▋   | 20/30 [00:37<00:15,  1.54s/it]

{'eval_loss': 0.6788421869277954, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0888, 'eval_samples_per_second': 11.256, 'eval_steps_per_second': 11.256, 'epoch': 2.0}


100%|██████████| 30/30 [00:56<00:00,  1.55s/it]
100%|██████████| 30/30 [00:56<00:00,  1.55s/it]

{'eval_loss': 0.65254807472229, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0941, 'eval_samples_per_second': 10.632, 'eval_steps_per_second': 10.632, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.92s/it]


{'train_runtime': 57.6325, 'train_samples_per_second': 3.8, 'train_steps_per_second': 0.521, 'train_loss': 0.6597504297892253, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 76.14it/s]
100%|██████████| 1/1 [00:00<00:00, 76.38it/s]
100%|██████████| 1/1 [00:00<00:00, 165.22it/s]
Casting the dataset: 100%|██████████| 73/73 [00:00<00:00, 24348.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.46 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 111.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  _

{'eval_loss': 0.7118138670921326, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.225, 'eval_steps_per_second': 12.225, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:36<00:15,  1.52s/it]
 67%|██████▋   | 20/30 [00:36<00:15,  1.52s/it]

{'eval_loss': 0.6788421869277954, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.016, 'eval_steps_per_second': 13.016, 'epoch': 2.0}


100%|██████████| 30/30 [00:56<00:00,  1.57s/it]
100%|██████████| 30/30 [00:56<00:00,  1.57s/it]

{'eval_loss': 0.65254807472229, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.936, 'eval_steps_per_second': 11.936, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.94s/it]


{'train_runtime': 58.0853, 'train_samples_per_second': 3.77, 'train_steps_per_second': 0.516, 'train_loss': 0.6597504297892253, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.38it/s]
100%|██████████| 1/1 [00:00<00:00, 141.74it/s]
100%|██████████| 1/1 [00:00<00:00, 141.72it/s]
Casting the dataset: 100%|██████████| 73/73 [00:00<00:00, 24309.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 953.25 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 114.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  _w

{'eval_loss': 0.7118138670921326, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0837, 'eval_samples_per_second': 11.95, 'eval_steps_per_second': 11.95, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:36<00:14,  1.50s/it]
 67%|██████▋   | 20/30 [00:36<00:14,  1.50s/it]

{'eval_loss': 0.6788421869277954, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.066, 'eval_steps_per_second': 12.066, 'epoch': 2.0}


100%|██████████| 30/30 [00:54<00:00,  1.51s/it]
100%|██████████| 30/30 [00:54<00:00,  1.51s/it]

{'eval_loss': 0.65254807472229, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0878, 'eval_samples_per_second': 11.384, 'eval_steps_per_second': 11.384, 'epoch': 3.0}


100%|██████████| 30/30 [00:56<00:00,  1.88s/it]


{'train_runtime': 56.3805, 'train_samples_per_second': 3.884, 'train_steps_per_second': 0.532, 'train_loss': 0.6597504297892253, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 165.02it/s]
100%|██████████| 1/1 [00:00<00:00, 164.86it/s]
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
Casting the dataset: 100%|██████████| 73/73 [00:00<00:00, 24038.96 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 112.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.84 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  _

{'eval_loss': 0.7118138670921326, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.839, 'eval_steps_per_second': 12.839, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:37<00:15,  1.56s/it]
 67%|██████▋   | 20/30 [00:37<00:15,  1.56s/it]

{'eval_loss': 0.6788421869277954, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0896, 'eval_samples_per_second': 11.157, 'eval_steps_per_second': 11.157, 'epoch': 2.0}


100%|██████████| 30/30 [00:55<00:00,  1.51s/it]
100%|██████████| 30/30 [00:55<00:00,  1.51s/it]

{'eval_loss': 0.65254807472229, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.281, 'eval_steps_per_second': 12.281, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.91s/it]


{'train_runtime': 57.2323, 'train_samples_per_second': 3.827, 'train_steps_per_second': 0.524, 'train_loss': 0.6597504297892253, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 140.96it/s]
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
100%|██████████| 1/1 [00:00<00:00, 164.53it/s]
Casting the dataset: 100%|██████████| 74/74 [00:00<00:00, 24374.00 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 108.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  

{'eval_loss': 0.7124155163764954, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.373, 'eval_steps_per_second': 12.373, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:36<00:15,  1.57s/it]
 67%|██████▋   | 20/30 [00:36<00:15,  1.57s/it]

{'eval_loss': 0.6794328093528748, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0888, 'eval_samples_per_second': 11.264, 'eval_steps_per_second': 11.264, 'epoch': 2.0}


100%|██████████| 30/30 [00:55<00:00,  1.58s/it]
100%|██████████| 30/30 [00:55<00:00,  1.58s/it]

{'eval_loss': 0.6527020931243896, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.848, 'eval_steps_per_second': 11.848, 'epoch': 3.0}


100%|██████████| 30/30 [00:56<00:00,  1.90s/it]


{'train_runtime': 56.9492, 'train_samples_per_second': 3.898, 'train_steps_per_second': 0.527, 'train_loss': 0.6578942616780599, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 70.63it/s]
100%|██████████| 1/1 [00:00<00:00, 164.59it/s]
Casting the dataset: 100%|██████████| 74/74 [00:00<00:00, 24680.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 112.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 198.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.7124155163764954, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.225, 'eval_steps_per_second': 12.225, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:36<00:15,  1.53s/it]
 67%|██████▋   | 20/30 [00:36<00:15,  1.53s/it]

{'eval_loss': 0.6794328093528748, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.187, 'eval_steps_per_second': 13.187, 'epoch': 2.0}


100%|██████████| 30/30 [00:55<00:00,  1.54s/it]
100%|██████████| 30/30 [00:55<00:00,  1.54s/it]

{'eval_loss': 0.6527020931243896, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.072, 'eval_steps_per_second': 12.072, 'epoch': 3.0}


100%|██████████| 30/30 [00:56<00:00,  1.88s/it]


{'train_runtime': 56.5194, 'train_samples_per_second': 3.928, 'train_steps_per_second': 0.531, 'train_loss': 0.6578942616780599, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.15it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
100%|██████████| 1/1 [00:00<00:00, 142.91it/s]
Casting the dataset: 100%|██████████| 74/74 [00:00<00:00, 24288.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 113.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 369.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.87 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.7124155163764954, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.69, 'eval_steps_per_second': 12.69, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:36<00:15,  1.54s/it]
 67%|██████▋   | 20/30 [00:36<00:15,  1.54s/it]

{'eval_loss': 0.6794328093528748, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.364, 'eval_steps_per_second': 12.364, 'epoch': 2.0}


100%|██████████| 30/30 [00:54<00:00,  1.52s/it]
100%|██████████| 30/30 [00:54<00:00,  1.52s/it]

{'eval_loss': 0.6527020931243896, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.218, 'eval_steps_per_second': 12.218, 'epoch': 3.0}


100%|██████████| 30/30 [00:56<00:00,  1.88s/it]


{'train_runtime': 56.2765, 'train_samples_per_second': 3.945, 'train_steps_per_second': 0.533, 'train_loss': 0.6578942616780599, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.30it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.94it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
Casting the dataset: 100%|██████████| 74/74 [00:00<00:00, 36980.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 950.66 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 962.66 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 112.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.67 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.7124155163764954, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.891, 'eval_steps_per_second': 11.891, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:36<00:15,  1.58s/it]
 67%|██████▋   | 20/30 [00:36<00:15,  1.58s/it]

{'eval_loss': 0.6794328093528748, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.373, 'eval_steps_per_second': 12.373, 'epoch': 2.0}


100%|██████████| 30/30 [00:55<00:00,  1.58s/it]
100%|██████████| 30/30 [00:56<00:00,  1.58s/it]

{'eval_loss': 0.6527020931243896, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.936, 'eval_steps_per_second': 11.936, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.91s/it]


{'train_runtime': 57.4281, 'train_samples_per_second': 3.866, 'train_steps_per_second': 0.522, 'train_loss': 0.6578942616780599, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.32it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.80it/s]
100%|██████████| 1/1 [00:00<00:00, 165.13it/s]
Casting the dataset: 100%|██████████| 74/74 [00:00<00:00, 24678.26 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 963.54 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 112.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.7124155163764954, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.185, 'eval_steps_per_second': 13.185, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:36<00:15,  1.53s/it]
 67%|██████▋   | 20/30 [00:36<00:15,  1.53s/it]

{'eval_loss': 0.6794328093528748, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.224, 'eval_steps_per_second': 12.224, 'epoch': 2.0}


100%|██████████| 30/30 [00:55<00:00,  1.56s/it]
100%|██████████| 30/30 [00:55<00:00,  1.56s/it]

{'eval_loss': 0.6527020931243896, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.842, 'eval_steps_per_second': 11.842, 'epoch': 3.0}


100%|██████████| 30/30 [00:56<00:00,  1.89s/it]


{'train_runtime': 56.6318, 'train_samples_per_second': 3.92, 'train_steps_per_second': 0.53, 'train_loss': 0.6578942616780599, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 164.34it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.88it/s]
100%|██████████| 1/1 [00:00<00:00, 140.72it/s]
Casting the dataset: 100%|██████████| 75/75 [00:00<00:00, 24572.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.61 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 110.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.7123205661773682, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0845, 'eval_samples_per_second': 11.837, 'eval_steps_per_second': 11.837, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:36<00:15,  1.59s/it]
 67%|██████▋   | 20/30 [00:36<00:15,  1.59s/it]

{'eval_loss': 0.6798526048660278, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.915, 'eval_steps_per_second': 11.915, 'epoch': 2.0}


100%|██████████| 30/30 [00:55<00:00,  1.59s/it]
100%|██████████| 30/30 [00:55<00:00,  1.59s/it]

{'eval_loss': 0.6525657176971436, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.931, 'eval_steps_per_second': 11.931, 'epoch': 3.0}


100%|██████████| 30/30 [00:56<00:00,  1.89s/it]


{'train_runtime': 56.7681, 'train_samples_per_second': 3.963, 'train_steps_per_second': 0.528, 'train_loss': 0.65706361134847, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 123.88it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 140.17it/s]
100%|██████████| 1/1 [00:00<00:00, 141.80it/s]
Casting the dataset: 100%|██████████| 75/75 [00:00<00:00, 25007.77 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 111.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.7123205661773682, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.044, 'eval_steps_per_second': 12.044, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:37<00:16,  1.64s/it]
 67%|██████▋   | 20/30 [00:37<00:16,  1.64s/it]

{'eval_loss': 0.6798526048660278, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.502, 'eval_steps_per_second': 11.502, 'epoch': 2.0}


100%|██████████| 30/30 [00:55<00:00,  1.59s/it]
100%|██████████| 30/30 [00:56<00:00,  1.59s/it]

{'eval_loss': 0.6525657176971436, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.983, 'eval_steps_per_second': 11.983, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.92s/it]


{'train_runtime': 57.4754, 'train_samples_per_second': 3.915, 'train_steps_per_second': 0.522, 'train_loss': 0.65706361134847, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 165.26it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
100%|██████████| 1/1 [00:00<00:00, 164.95it/s]
Casting the dataset: 100%|██████████| 75/75 [00:00<00:00, 24438.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 110.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.7123205661773682, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.967, 'eval_steps_per_second': 11.967, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:37<00:16,  1.64s/it]
 67%|██████▋   | 20/30 [00:37<00:16,  1.64s/it]

{'eval_loss': 0.6798526048660278, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.1232, 'eval_samples_per_second': 8.118, 'eval_steps_per_second': 8.118, 'epoch': 2.0}


100%|██████████| 30/30 [00:56<00:00,  1.64s/it]
100%|██████████| 30/30 [00:56<00:00,  1.64s/it]

{'eval_loss': 0.6525657176971436, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.841, 'eval_steps_per_second': 12.841, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.94s/it]


{'train_runtime': 58.1833, 'train_samples_per_second': 3.867, 'train_steps_per_second': 0.516, 'train_loss': 0.65706361134847, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 164.46it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.66it/s]
100%|██████████| 1/1 [00:00<00:00, 164.58it/s]
Casting the dataset: 100%|██████████| 75/75 [00:00<00:00, 25001.81 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 111.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.84 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.7123205661773682, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.068, 'eval_steps_per_second': 12.068, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:36<00:15,  1.60s/it]
 67%|██████▋   | 20/30 [00:36<00:15,  1.60s/it]

{'eval_loss': 0.6798526048660278, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.088, 'eval_samples_per_second': 11.365, 'eval_steps_per_second': 11.365, 'epoch': 2.0}


100%|██████████| 30/30 [00:56<00:00,  1.64s/it]
100%|██████████| 30/30 [00:56<00:00,  1.64s/it]

{'eval_loss': 0.6525657176971436, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.27, 'eval_steps_per_second': 12.27, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.92s/it]


{'train_runtime': 57.6253, 'train_samples_per_second': 3.905, 'train_steps_per_second': 0.521, 'train_loss': 0.65706361134847, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 162.92it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.53it/s]
100%|██████████| 1/1 [00:00<00:00, 142.88it/s]
Casting the dataset: 100%|██████████| 75/75 [00:00<00:00, 24593.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.32 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 110.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.7123205661773682, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.922, 'eval_steps_per_second': 11.922, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:37<00:16,  1.62s/it]
 67%|██████▋   | 20/30 [00:37<00:16,  1.62s/it]

{'eval_loss': 0.6798526048660278, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0907, 'eval_samples_per_second': 11.027, 'eval_steps_per_second': 11.027, 'epoch': 2.0}


100%|██████████| 30/30 [00:56<00:00,  1.68s/it]
100%|██████████| 30/30 [00:56<00:00,  1.68s/it]

{'eval_loss': 0.6525657176971436, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.623, 'eval_steps_per_second': 11.623, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.94s/it]


{'train_runtime': 58.2951, 'train_samples_per_second': 3.86, 'train_steps_per_second': 0.515, 'train_loss': 0.65706361134847, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.31it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 140.48it/s]
100%|██████████| 1/1 [00:00<00:00, 165.00it/s]
Casting the dataset: 100%|██████████| 76/76 [00:00<00:00, 37143.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.94 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 110.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.7122890949249268, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1207, 'eval_samples_per_second': 8.287, 'eval_steps_per_second': 8.287, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:37<00:17,  1.70s/it]
 67%|██████▋   | 20/30 [00:37<00:17,  1.70s/it]

{'eval_loss': 0.6793912053108215, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0865, 'eval_samples_per_second': 11.556, 'eval_steps_per_second': 11.556, 'epoch': 2.0}


100%|██████████| 30/30 [00:56<00:00,  1.64s/it]
100%|██████████| 30/30 [00:56<00:00,  1.64s/it]

{'eval_loss': 0.6518060564994812, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.1297, 'eval_samples_per_second': 7.709, 'eval_steps_per_second': 7.709, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.94s/it]


{'train_runtime': 58.094, 'train_samples_per_second': 3.925, 'train_steps_per_second': 0.516, 'train_loss': 0.6566094080607097, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 140.84it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
100%|██████████| 1/1 [00:00<00:00, 141.68it/s]
Casting the dataset: 100%|██████████| 76/76 [00:00<00:00, 37723.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 955.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 108.47 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.7122890949249268, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1437, 'eval_samples_per_second': 6.957, 'eval_steps_per_second': 6.957, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:38<00:17,  1.73s/it]
 67%|██████▋   | 20/30 [00:38<00:17,  1.73s/it]

{'eval_loss': 0.6793912053108215, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.222, 'eval_steps_per_second': 12.222, 'epoch': 2.0}


100%|██████████| 30/30 [00:58<00:00,  1.71s/it]
100%|██████████| 30/30 [00:58<00:00,  1.71s/it]

{'eval_loss': 0.6518060564994812, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0919, 'eval_samples_per_second': 10.877, 'eval_steps_per_second': 10.877, 'epoch': 3.0}


100%|██████████| 30/30 [00:59<00:00,  2.00s/it]


{'train_runtime': 59.9358, 'train_samples_per_second': 3.804, 'train_steps_per_second': 0.501, 'train_loss': 0.6566094080607097, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
100%|██████████| 1/1 [00:00<00:00, 140.88it/s]
Casting the dataset: 100%|██████████| 76/76 [00:00<00:00, 25317.06 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 956.95 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 107.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 319.69 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.7122890949249268, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1323, 'eval_samples_per_second': 7.561, 'eval_steps_per_second': 7.561, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:37<00:17,  1.70s/it]
 67%|██████▋   | 20/30 [00:37<00:17,  1.70s/it]

{'eval_loss': 0.6793912053108215, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0915, 'eval_samples_per_second': 10.934, 'eval_steps_per_second': 10.934, 'epoch': 2.0}


100%|██████████| 30/30 [00:57<00:00,  1.67s/it]
100%|██████████| 30/30 [00:57<00:00,  1.67s/it]

{'eval_loss': 0.6518060564994812, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.1343, 'eval_samples_per_second': 7.446, 'eval_steps_per_second': 7.446, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.96s/it]


{'train_runtime': 58.7421, 'train_samples_per_second': 3.881, 'train_steps_per_second': 0.511, 'train_loss': 0.6566094080607097, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 165.24it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.46it/s]
100%|██████████| 1/1 [00:00<00:00, 164.56it/s]
Casting the dataset: 100%|██████████| 76/76 [00:00<00:00, 24905.63 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 107.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.7122890949249268, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.272, 'eval_steps_per_second': 12.272, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:37<00:16,  1.65s/it]
 67%|██████▋   | 20/30 [00:37<00:16,  1.65s/it]

{'eval_loss': 0.6793912053108215, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.1031, 'eval_samples_per_second': 9.697, 'eval_steps_per_second': 9.697, 'epoch': 2.0}


100%|██████████| 30/30 [00:57<00:00,  1.72s/it]
100%|██████████| 30/30 [00:57<00:00,  1.72s/it]

{'eval_loss': 0.6518060564994812, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.377, 'eval_steps_per_second': 12.377, 'epoch': 3.0}


100%|██████████| 30/30 [00:59<00:00,  1.98s/it]


{'train_runtime': 59.4085, 'train_samples_per_second': 3.838, 'train_steps_per_second': 0.505, 'train_loss': 0.6566094080607097, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 164.57it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.30it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
Casting the dataset: 100%|██████████| 76/76 [00:00<00:00, 37989.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 104.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 213.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.93 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.7122890949249268, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.098, 'eval_samples_per_second': 10.205, 'eval_steps_per_second': 10.205, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:37<00:16,  1.63s/it]
 67%|██████▋   | 20/30 [00:37<00:16,  1.63s/it]

{'eval_loss': 0.6793912053108215, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.1273, 'eval_samples_per_second': 7.858, 'eval_steps_per_second': 7.858, 'epoch': 2.0}


100%|██████████| 30/30 [00:56<00:00,  1.64s/it]
100%|██████████| 30/30 [00:56<00:00,  1.64s/it]

{'eval_loss': 0.6518060564994812, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.1031, 'eval_samples_per_second': 9.696, 'eval_steps_per_second': 9.696, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.92s/it]


{'train_runtime': 57.4983, 'train_samples_per_second': 3.965, 'train_steps_per_second': 0.522, 'train_loss': 0.6566094080607097, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.83it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.67it/s]
100%|██████████| 1/1 [00:00<00:00, 164.66it/s]
Casting the dataset: 100%|██████████| 77/77 [00:00<00:00, 25241.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 109.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 311.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream tas

{'eval_loss': 0.712274968624115, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.021, 'eval_steps_per_second': 13.021, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:37<00:17,  1.70s/it]
 67%|██████▋   | 20/30 [00:37<00:17,  1.70s/it]

{'eval_loss': 0.6791061162948608, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.373, 'eval_steps_per_second': 13.373, 'epoch': 2.0}


100%|██████████| 30/30 [00:56<00:00,  1.71s/it]
100%|██████████| 30/30 [00:57<00:00,  1.71s/it]

{'eval_loss': 0.6517552137374878, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.219, 'eval_steps_per_second': 12.219, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.95s/it]


{'train_runtime': 58.3681, 'train_samples_per_second': 3.958, 'train_steps_per_second': 0.514, 'train_loss': 0.656155014038086, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 124.11it/s]
100%|██████████| 1/1 [00:00<00:00, 141.26it/s]
Casting the dataset: 100%|██████████| 77/77 [00:00<00:00, 25652.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 878.57 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 102.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.712274968624115, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.516, 'eval_steps_per_second': 11.516, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:38<00:17,  1.76s/it]
 67%|██████▋   | 20/30 [00:38<00:17,  1.76s/it]

{'eval_loss': 0.6791061162948608, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.011, 'eval_steps_per_second': 13.011, 'epoch': 2.0}


100%|██████████| 30/30 [00:57<00:00,  1.71s/it]
100%|██████████| 30/30 [00:57<00:00,  1.71s/it]

{'eval_loss': 0.6517552137374878, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.931, 'eval_steps_per_second': 11.931, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.96s/it]


{'train_runtime': 58.9308, 'train_samples_per_second': 3.92, 'train_steps_per_second': 0.509, 'train_loss': 0.656155014038086, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 164.75it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.19it/s]
100%|██████████| 1/1 [00:00<00:00, 164.88it/s]
Casting the dataset: 100%|██████████| 77/77 [00:00<00:00, 25666.49 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 107.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 323.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 317.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.712274968624115, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.064, 'eval_steps_per_second': 12.064, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:37<00:17,  1.78s/it]
 67%|██████▋   | 20/30 [00:38<00:17,  1.78s/it]

{'eval_loss': 0.6791061162948608, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0757, 'eval_samples_per_second': 13.202, 'eval_steps_per_second': 13.202, 'epoch': 2.0}


100%|██████████| 30/30 [00:58<00:00,  1.78s/it]
100%|██████████| 30/30 [00:58<00:00,  1.78s/it]

{'eval_loss': 0.6517552137374878, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.289, 'eval_steps_per_second': 12.289, 'epoch': 3.0}


100%|██████████| 30/30 [00:59<00:00,  1.98s/it]


{'train_runtime': 59.5374, 'train_samples_per_second': 3.88, 'train_steps_per_second': 0.504, 'train_loss': 0.656155014038086, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.21it/s]
100%|██████████| 1/1 [00:00<00:00, 164.73it/s]
Casting the dataset: 100%|██████████| 77/77 [00:00<00:00, 25668.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 109.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 316.72 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.712274968624115, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.363, 'eval_steps_per_second': 12.363, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:37<00:17,  1.71s/it]
 67%|██████▋   | 20/30 [00:37<00:17,  1.71s/it]

{'eval_loss': 0.6791061162948608, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0845, 'eval_samples_per_second': 11.839, 'eval_steps_per_second': 11.839, 'epoch': 2.0}


100%|██████████| 30/30 [00:57<00:00,  1.75s/it]
100%|██████████| 30/30 [00:57<00:00,  1.75s/it]

{'eval_loss': 0.6517552137374878, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.363, 'eval_steps_per_second': 12.363, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.96s/it]


{'train_runtime': 58.6828, 'train_samples_per_second': 3.936, 'train_steps_per_second': 0.511, 'train_loss': 0.656155014038086, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 164.91it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.65it/s]
100%|██████████| 1/1 [00:00<00:00, 164.46it/s]
Casting the dataset: 100%|██████████| 77/77 [00:00<00:00, 25241.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 109.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.712274968624115, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.967, 'eval_steps_per_second': 11.967, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:38<00:17,  1.73s/it]
 67%|██████▋   | 20/30 [00:38<00:17,  1.73s/it]

{'eval_loss': 0.6791061162948608, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.507, 'eval_steps_per_second': 11.507, 'epoch': 2.0}


100%|██████████| 30/30 [00:58<00:00,  1.77s/it]
100%|██████████| 30/30 [00:58<00:00,  1.77s/it]

{'eval_loss': 0.6517552137374878, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0759, 'eval_samples_per_second': 13.183, 'eval_steps_per_second': 13.183, 'epoch': 3.0}


100%|██████████| 30/30 [00:59<00:00,  1.99s/it]


{'train_runtime': 59.5887, 'train_samples_per_second': 3.877, 'train_steps_per_second': 0.503, 'train_loss': 0.656155014038086, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 164.94it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.15it/s]
100%|██████████| 1/1 [00:00<00:00, 141.26it/s]
Casting the dataset: 100%|██████████| 78/78 [00:00<00:00, 19281.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 956.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 108.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.64 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.7121273875236511, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.028, 'eval_steps_per_second': 13.028, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:37<00:17,  1.76s/it]
 67%|██████▋   | 20/30 [00:38<00:17,  1.76s/it]

{'eval_loss': 0.6782718896865845, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0843, 'eval_samples_per_second': 11.868, 'eval_steps_per_second': 11.868, 'epoch': 2.0}


100%|██████████| 30/30 [00:57<00:00,  1.80s/it]
100%|██████████| 30/30 [00:57<00:00,  1.80s/it]

{'eval_loss': 0.6511890292167664, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.22, 'eval_steps_per_second': 12.22, 'epoch': 3.0}


100%|██████████| 30/30 [00:59<00:00,  1.98s/it]


{'train_runtime': 59.3746, 'train_samples_per_second': 3.941, 'train_steps_per_second': 0.505, 'train_loss': 0.6559331893920899, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 90.32it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.53it/s]
100%|██████████| 1/1 [00:00<00:00, 141.74it/s]
Casting the dataset: 100%|██████████| 78/78 [00:00<00:00, 39002.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 955.86 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 107.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.84 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.7121273875236511, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.21, 'eval_steps_per_second': 12.21, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:38<00:18,  1.81s/it]
 67%|██████▋   | 20/30 [00:38<00:18,  1.81s/it]

{'eval_loss': 0.6782718896865845, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.784, 'eval_steps_per_second': 11.784, 'epoch': 2.0}


100%|██████████| 30/30 [00:58<00:00,  1.79s/it]
100%|██████████| 30/30 [00:58<00:00,  1.79s/it]

{'eval_loss': 0.6511890292167664, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.771, 'eval_steps_per_second': 11.771, 'epoch': 3.0}


100%|██████████| 30/30 [00:59<00:00,  1.99s/it]


{'train_runtime': 59.7378, 'train_samples_per_second': 3.917, 'train_steps_per_second': 0.502, 'train_loss': 0.6559331893920899, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.26it/s]
100%|██████████| 1/1 [00:00<00:00, 164.70it/s]
Casting the dataset: 100%|██████████| 78/78 [00:00<00:00, 19314.90 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 945.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 487.54 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 104.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 218.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.64 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.7121273875236511, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.996, 'eval_steps_per_second': 11.996, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:38<00:17,  1.75s/it]
 67%|██████▋   | 20/30 [00:38<00:17,  1.75s/it]

{'eval_loss': 0.6782718896865845, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.067, 'eval_steps_per_second': 12.067, 'epoch': 2.0}


100%|██████████| 30/30 [00:57<00:00,  1.74s/it]
100%|██████████| 30/30 [00:57<00:00,  1.74s/it]

{'eval_loss': 0.6511890292167664, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.92, 'eval_steps_per_second': 11.92, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.96s/it]


{'train_runtime': 58.7929, 'train_samples_per_second': 3.98, 'train_steps_per_second': 0.51, 'train_loss': 0.6559331893920899, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 165.08it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.40it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 78/78 [00:00<00:00, 25441.77 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 106.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.7121273875236511, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.069, 'eval_steps_per_second': 12.069, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:38<00:18,  1.81s/it]
 67%|██████▋   | 20/30 [00:38<00:18,  1.81s/it]

{'eval_loss': 0.6782718896865845, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.691, 'eval_steps_per_second': 12.691, 'epoch': 2.0}


100%|██████████| 30/30 [00:57<00:00,  1.77s/it]
100%|██████████| 30/30 [00:57<00:00,  1.77s/it]

{'eval_loss': 0.6511890292167664, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.225, 'eval_steps_per_second': 12.225, 'epoch': 3.0}


100%|██████████| 30/30 [00:59<00:00,  1.98s/it]


{'train_runtime': 59.3816, 'train_samples_per_second': 3.941, 'train_steps_per_second': 0.505, 'train_loss': 0.6559331893920899, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.48it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.24it/s]
100%|██████████| 1/1 [00:00<00:00, 141.73it/s]
Casting the dataset: 100%|██████████| 78/78 [00:00<00:00, 19503.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 103.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.7121273875236511, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.523, 'eval_steps_per_second': 12.523, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:38<00:17,  1.80s/it]
 67%|██████▋   | 20/30 [00:38<00:17,  1.80s/it]

{'eval_loss': 0.6782718896865845, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.694, 'eval_steps_per_second': 12.694, 'epoch': 2.0}


100%|██████████| 30/30 [00:57<00:00,  1.75s/it]
100%|██████████| 30/30 [00:57<00:00,  1.75s/it]

{'eval_loss': 0.6511890292167664, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.226, 'eval_steps_per_second': 12.226, 'epoch': 3.0}


100%|██████████| 30/30 [00:59<00:00,  1.98s/it]


{'train_runtime': 59.3436, 'train_samples_per_second': 3.943, 'train_steps_per_second': 0.506, 'train_loss': 0.6559331893920899, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 164.72it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
100%|██████████| 1/1 [00:00<00:00, 166.74it/s]
Casting the dataset: 100%|██████████| 79/79 [00:00<00:00, 19754.96 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 954.34 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 108.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.7120887637138367, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.21, 'eval_steps_per_second': 12.21, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:39<00:18,  1.87s/it]
 67%|██████▋   | 20/30 [00:39<00:18,  1.87s/it]

{'eval_loss': 0.6816449761390686, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.291, 'eval_steps_per_second': 12.291, 'epoch': 2.0}


100%|██████████| 30/30 [00:59<00:00,  1.88s/it]
100%|██████████| 30/30 [00:59<00:00,  1.88s/it]

{'eval_loss': 0.6584764122962952, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.927, 'eval_steps_per_second': 11.927, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.03s/it]


{'train_runtime': 60.9476, 'train_samples_per_second': 3.889, 'train_steps_per_second': 0.492, 'train_loss': 0.6567720413208008, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.13it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 65.92it/s]
100%|██████████| 1/1 [00:00<00:00, 165.31it/s]
Casting the dataset: 100%|██████████| 79/79 [00:00<00:00, 26312.24 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.56 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 107.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.83 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.7120887637138367, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.924, 'eval_steps_per_second': 11.924, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:38<00:18,  1.85s/it]
 67%|██████▋   | 20/30 [00:38<00:18,  1.85s/it]

{'eval_loss': 0.6816449761390686, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.784, 'eval_steps_per_second': 11.784, 'epoch': 2.0}


100%|██████████| 30/30 [00:58<00:00,  1.85s/it]
100%|██████████| 30/30 [00:59<00:00,  1.85s/it]

{'eval_loss': 0.6584764122962952, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.644, 'eval_steps_per_second': 11.644, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.01s/it]


{'train_runtime': 60.3995, 'train_samples_per_second': 3.924, 'train_steps_per_second': 0.497, 'train_loss': 0.6567720413208008, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 76.47it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.49it/s]
100%|██████████| 1/1 [00:00<00:00, 164.77it/s]
Casting the dataset: 100%|██████████| 79/79 [00:00<00:00, 39451.13 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 107.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.51 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.7120887637138367, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.777, 'eval_steps_per_second': 11.777, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:39<00:18,  1.86s/it]
 67%|██████▋   | 20/30 [00:39<00:18,  1.86s/it]

{'eval_loss': 0.6816449761390686, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.217, 'eval_steps_per_second': 12.217, 'epoch': 2.0}


100%|██████████| 30/30 [00:59<00:00,  1.87s/it]
100%|██████████| 30/30 [00:59<00:00,  1.87s/it]

{'eval_loss': 0.6584764122962952, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.915, 'eval_steps_per_second': 11.915, 'epoch': 3.0}


100%|██████████| 30/30 [01:01<00:00,  2.04s/it]


{'train_runtime': 61.2556, 'train_samples_per_second': 3.869, 'train_steps_per_second': 0.49, 'train_loss': 0.6567720413208008, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.33it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 131.93it/s]
100%|██████████| 1/1 [00:00<00:00, 164.72it/s]
Casting the dataset: 100%|██████████| 79/79 [00:00<00:00, 26343.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 888.44 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 107.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.7120887637138367, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.373, 'eval_steps_per_second': 12.373, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:38<00:18,  1.84s/it]
 67%|██████▋   | 20/30 [00:38<00:18,  1.84s/it]

{'eval_loss': 0.6816449761390686, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.366, 'eval_steps_per_second': 12.366, 'epoch': 2.0}


100%|██████████| 30/30 [00:58<00:00,  1.85s/it]
100%|██████████| 30/30 [00:58<00:00,  1.85s/it]

{'eval_loss': 0.6584764122962952, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.088, 'eval_samples_per_second': 11.359, 'eval_steps_per_second': 11.359, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.01s/it]


{'train_runtime': 60.2042, 'train_samples_per_second': 3.937, 'train_steps_per_second': 0.498, 'train_loss': 0.6567720413208008, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.12it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.33it/s]
100%|██████████| 1/1 [00:00<00:00, 164.75it/s]
Casting the dataset: 100%|██████████| 79/79 [00:00<00:00, 19561.37 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 107.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.7120887637138367, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.355, 'eval_steps_per_second': 12.355, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:38<00:18,  1.81s/it]
 67%|██████▋   | 20/30 [00:38<00:18,  1.81s/it]

{'eval_loss': 0.6816449761390686, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.058, 'eval_steps_per_second': 12.058, 'epoch': 2.0}


100%|██████████| 30/30 [00:58<00:00,  1.88s/it]
100%|██████████| 30/30 [00:58<00:00,  1.88s/it]

{'eval_loss': 0.6584764122962952, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0764, 'eval_samples_per_second': 13.09, 'eval_steps_per_second': 13.09, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.00s/it]


{'train_runtime': 60.0177, 'train_samples_per_second': 3.949, 'train_steps_per_second': 0.5, 'train_loss': 0.6567720413208008, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.08it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.97it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
Casting the dataset: 100%|██████████| 80/80 [00:00<00:00, 19993.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 108.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.7124419212341309, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.683, 'eval_steps_per_second': 11.683, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:38<00:18,  1.86s/it]
 67%|██████▋   | 20/30 [00:38<00:18,  1.86s/it]

{'eval_loss': 0.6817755103111267, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.772, 'eval_steps_per_second': 11.772, 'epoch': 2.0}


100%|██████████| 30/30 [00:58<00:00,  1.89s/it]
100%|██████████| 30/30 [00:58<00:00,  1.89s/it]

{'eval_loss': 0.6573780179023743, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.151, 'eval_steps_per_second': 12.151, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.01s/it]


{'train_runtime': 60.3228, 'train_samples_per_second': 3.979, 'train_steps_per_second': 0.497, 'train_loss': 0.6567092895507812, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.63it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 140.93it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
Casting the dataset: 100%|██████████| 80/80 [00:00<00:00, 39121.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 966.65 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 108.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.7124419212341309, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.093, 'eval_samples_per_second': 10.754, 'eval_steps_per_second': 10.754, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:38<00:18,  1.83s/it]
 67%|██████▋   | 20/30 [00:38<00:18,  1.83s/it]

{'eval_loss': 0.6817755103111267, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.655, 'eval_steps_per_second': 11.655, 'epoch': 2.0}


100%|██████████| 30/30 [00:58<00:00,  1.89s/it]
100%|██████████| 30/30 [00:58<00:00,  1.89s/it]

{'eval_loss': 0.6573780179023743, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0853, 'eval_samples_per_second': 11.724, 'eval_steps_per_second': 11.724, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.01s/it]


{'train_runtime': 60.3534, 'train_samples_per_second': 3.977, 'train_steps_per_second': 0.497, 'train_loss': 0.6567092895507812, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 147.41it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.55it/s]
100%|██████████| 1/1 [00:00<00:00, 151.76it/s]
Casting the dataset: 100%|██████████| 80/80 [00:00<00:00, 20003.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 954.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 102.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.48 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.7124419212341309, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0965, 'eval_samples_per_second': 10.361, 'eval_steps_per_second': 10.361, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:39<00:18,  1.90s/it]
 67%|██████▋   | 20/30 [00:39<00:18,  1.90s/it]

{'eval_loss': 0.6817755103111267, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.217, 'eval_steps_per_second': 12.217, 'epoch': 2.0}


100%|██████████| 30/30 [00:58<00:00,  1.85s/it]
100%|██████████| 30/30 [00:58<00:00,  1.85s/it]

{'eval_loss': 0.6573780179023743, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.224, 'eval_steps_per_second': 12.224, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.01s/it]


{'train_runtime': 60.1735, 'train_samples_per_second': 3.988, 'train_steps_per_second': 0.499, 'train_loss': 0.6567092895507812, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 164.55it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.55it/s]
100%|██████████| 1/1 [00:00<00:00, 164.81it/s]
Casting the dataset: 100%|██████████| 80/80 [00:00<00:00, 26668.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 104.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 482.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.89 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.7124419212341309, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0785, 'eval_samples_per_second': 12.738, 'eval_steps_per_second': 12.738, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:39<00:18,  1.89s/it]
 67%|██████▋   | 20/30 [00:39<00:18,  1.89s/it]

{'eval_loss': 0.6817755103111267, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.092, 'eval_samples_per_second': 10.866, 'eval_steps_per_second': 10.866, 'epoch': 2.0}


100%|██████████| 30/30 [00:59<00:00,  1.93s/it]
100%|██████████| 30/30 [00:59<00:00,  1.93s/it]

{'eval_loss': 0.6573780179023743, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.653, 'eval_steps_per_second': 11.653, 'epoch': 3.0}


100%|██████████| 30/30 [01:01<00:00,  2.04s/it]


{'train_runtime': 61.2529, 'train_samples_per_second': 3.918, 'train_steps_per_second': 0.49, 'train_loss': 0.6567092895507812, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 110.12it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.39it/s]
100%|██████████| 1/1 [00:00<00:00, 141.86it/s]
Casting the dataset: 100%|██████████| 80/80 [00:00<00:00, 26356.48 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 955.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 108.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.7124419212341309, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.364, 'eval_steps_per_second': 12.364, 'epoch': 1.0}


 67%|██████▋   | 20/30 [00:38<00:18,  1.83s/it]
 67%|██████▋   | 20/30 [00:38<00:18,  1.83s/it]

{'eval_loss': 0.6817755103111267, 'eval_precision': 1.0, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.8333333333333333, 'eval_accuracy': 0.9761904761904762, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0744, 'eval_samples_per_second': 13.433, 'eval_steps_per_second': 13.433, 'epoch': 2.0}


100%|██████████| 30/30 [00:58<00:00,  1.85s/it]
100%|██████████| 30/30 [00:58<00:00,  1.85s/it]

{'eval_loss': 0.6573780179023743, 'eval_precision': 0.7142857142857143, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7142857142857143, 'eval_accuracy': 0.9603174603174603, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0738, 'eval_samples_per_second': 13.544, 'eval_steps_per_second': 13.544, 'epoch': 3.0}


100%|██████████| 30/30 [00:59<00:00,  1.99s/it]


{'train_runtime': 59.576, 'train_samples_per_second': 4.028, 'train_steps_per_second': 0.504, 'train_loss': 0.6567092895507812, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 122.69it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
Casting the dataset: 100%|██████████| 81/81 [00:00<00:00, 16205.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 953.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 103.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.7172436118125916, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.775, 'eval_steps_per_second': 11.775, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:40<00:16,  1.50s/it]
 67%|██████▋   | 22/33 [00:40<00:16,  1.50s/it]

{'eval_loss': 0.6663991808891296, 'eval_precision': 0.8333333333333334, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.7692307692307692, 'eval_accuracy': 0.9682539682539683, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.374, 'eval_steps_per_second': 11.374, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:00<00:00,  1.52s/it]

{'eval_loss': 0.6423327326774597, 'eval_precision': 0.625, 'eval_recall': 0.7142857142857143, 'eval_f1': 0.6666666666666666, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.8333333333333333, 'eval_runtime': 0.0884, 'eval_samples_per_second': 11.311, 'eval_steps_per_second': 11.311, 'epoch': 3.0}


100%|██████████| 33/33 [01:02<00:00,  1.88s/it]


{'train_runtime': 62.0843, 'train_samples_per_second': 3.914, 'train_steps_per_second': 0.532, 'train_loss': 0.6022069526441169, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 82.88it/s]
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
100%|██████████| 1/1 [00:00<00:00, 141.78it/s]
Casting the dataset: 100%|██████████| 81/81 [00:00<00:00, 39573.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 491.25 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 99.76 examples/s] 
Map: 100%|██████████| 1/1 [00:00<00:00, 329.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.578876256942749, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.53, 'eval_steps_per_second': 12.53, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:40<00:16,  1.50s/it]
 67%|██████▋   | 22/33 [00:40<00:16,  1.50s/it]

{'eval_loss': 0.4272578954696655, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.251, 'eval_steps_per_second': 11.251, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:01<00:00,  1.53s/it]

{'eval_loss': 0.3874342143535614, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0905, 'eval_samples_per_second': 11.048, 'eval_steps_per_second': 11.048, 'epoch': 3.0}


100%|██████████| 33/33 [01:02<00:00,  1.90s/it]


{'train_runtime': 62.5488, 'train_samples_per_second': 3.885, 'train_steps_per_second': 0.528, 'train_loss': 0.555152719671076, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 71.02it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 71.06it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 81/81 [00:00<00:00, 19435.85 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 103.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to 

{'eval_loss': 0.578876256942749, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.826, 'eval_steps_per_second': 11.826, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:40<00:16,  1.53s/it]
 67%|██████▋   | 22/33 [00:40<00:16,  1.53s/it]

{'eval_loss': 0.4272578954696655, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.642, 'eval_steps_per_second': 11.642, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:01<00:00,  1.55s/it]

{'eval_loss': 0.3874342143535614, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0797, 'eval_samples_per_second': 12.539, 'eval_steps_per_second': 12.539, 'epoch': 3.0}


100%|██████████| 33/33 [01:03<00:00,  1.92s/it]


{'train_runtime': 63.2566, 'train_samples_per_second': 3.841, 'train_steps_per_second': 0.522, 'train_loss': 0.555152719671076, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 164.77it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.88it/s]
100%|██████████| 1/1 [00:00<00:00, 140.52it/s]
Casting the dataset: 100%|██████████| 81/81 [00:00<00:00, 26997.67 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 491.19 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 105.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.578876256942749, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.916, 'eval_steps_per_second': 11.916, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:39<00:16,  1.49s/it]
 67%|██████▋   | 22/33 [00:39<00:16,  1.49s/it]

{'eval_loss': 0.4272578954696655, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0759, 'eval_samples_per_second': 13.173, 'eval_steps_per_second': 13.173, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:00<00:00,  1.54s/it]

{'eval_loss': 0.3874342143535614, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.38, 'eval_steps_per_second': 12.38, 'epoch': 3.0}


100%|██████████| 33/33 [01:02<00:00,  1.88s/it]


{'train_runtime': 62.1513, 'train_samples_per_second': 3.91, 'train_steps_per_second': 0.531, 'train_loss': 0.555152719671076, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 164.98it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.40it/s]
100%|██████████| 1/1 [00:00<00:00, 165.03it/s]
Casting the dataset: 100%|██████████| 81/81 [00:00<00:00, 20012.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 105.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.578876256942749, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.366, 'eval_steps_per_second': 13.366, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:39<00:16,  1.52s/it]
 67%|██████▋   | 22/33 [00:39<00:16,  1.52s/it]

{'eval_loss': 0.4272578954696655, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.363, 'eval_steps_per_second': 12.363, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:01<00:00,  1.55s/it]

{'eval_loss': 0.3874342143535614, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.08, 'eval_steps_per_second': 12.08, 'epoch': 3.0}


100%|██████████| 33/33 [01:02<00:00,  1.89s/it]


{'train_runtime': 62.3129, 'train_samples_per_second': 3.9, 'train_steps_per_second': 0.53, 'train_loss': 0.555152719671076, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 164.98it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
100%|██████████| 1/1 [00:00<00:00, 141.71it/s]
Casting the dataset: 100%|██████████| 82/82 [00:00<00:00, 20204.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 102.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.5792169570922852, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.194, 'eval_steps_per_second': 13.194, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:40<00:17,  1.58s/it]
 67%|██████▋   | 22/33 [00:40<00:17,  1.58s/it]

{'eval_loss': 0.4261963665485382, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.218, 'eval_steps_per_second': 12.218, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:02<00:00,  1.59s/it]

{'eval_loss': 0.38703858852386475, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.249, 'eval_steps_per_second': 11.249, 'epoch': 3.0}


100%|██████████| 33/33 [01:03<00:00,  1.93s/it]


{'train_runtime': 63.5694, 'train_samples_per_second': 3.87, 'train_steps_per_second': 0.519, 'train_loss': 0.5535608927408854, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 142.88it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.43it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
Casting the dataset: 100%|██████████| 82/82 [00:00<00:00, 26926.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 99.79 examples/s] 
Map: 100%|██████████| 1/1 [00:00<00:00, 247.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.5792169570922852, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.372, 'eval_steps_per_second': 12.372, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:40<00:17,  1.61s/it]
 67%|██████▋   | 22/33 [00:41<00:17,  1.61s/it]

{'eval_loss': 0.4261963665485382, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.364, 'eval_steps_per_second': 12.364, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:02<00:00,  1.63s/it]

{'eval_loss': 0.38703858852386475, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.198, 'eval_steps_per_second': 13.198, 'epoch': 3.0}


100%|██████████| 33/33 [01:04<00:00,  1.94s/it]


{'train_runtime': 64.0535, 'train_samples_per_second': 3.841, 'train_steps_per_second': 0.515, 'train_loss': 0.5535608927408854, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.85it/s]
100%|██████████| 1/1 [00:00<00:00, 141.46it/s]
Casting the dataset: 100%|██████████| 82/82 [00:00<00:00, 20501.49 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 103.64 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.5792169570922852, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0878, 'eval_samples_per_second': 11.389, 'eval_steps_per_second': 11.389, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:40<00:17,  1.59s/it]
 67%|██████▋   | 22/33 [00:40<00:17,  1.59s/it]

{'eval_loss': 0.4261963665485382, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.765, 'eval_steps_per_second': 11.765, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:01<00:00,  1.54s/it]

{'eval_loss': 0.38703858852386475, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.37, 'eval_steps_per_second': 12.37, 'epoch': 3.0}


100%|██████████| 33/33 [01:02<00:00,  1.91s/it]


{'train_runtime': 62.9253, 'train_samples_per_second': 3.909, 'train_steps_per_second': 0.524, 'train_loss': 0.5535608927408854, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 164.73it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.65it/s]
100%|██████████| 1/1 [00:00<00:00, 164.77it/s]
Casting the dataset: 100%|██████████| 82/82 [00:00<00:00, 20500.26 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 960.89 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 103.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.5792169570922852, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.371, 'eval_steps_per_second': 12.371, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:41<00:17,  1.62s/it]
 67%|██████▋   | 22/33 [00:41<00:17,  1.62s/it]

{'eval_loss': 0.4261963665485382, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.37, 'eval_steps_per_second': 12.37, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:01<00:00,  1.56s/it]

{'eval_loss': 0.38703858852386475, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.628, 'eval_steps_per_second': 11.628, 'epoch': 3.0}


100%|██████████| 33/33 [01:03<00:00,  1.92s/it]


{'train_runtime': 63.2934, 'train_samples_per_second': 3.887, 'train_steps_per_second': 0.521, 'train_loss': 0.5535608927408854, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 164.41it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.83it/s]
100%|██████████| 1/1 [00:00<00:00, 141.55it/s]
Casting the dataset: 100%|██████████| 82/82 [00:00<00:00, 26932.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 101.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.87 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5792169570922852, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.913, 'eval_steps_per_second': 11.913, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:40<00:17,  1.55s/it]
 67%|██████▋   | 22/33 [00:40<00:17,  1.55s/it]

{'eval_loss': 0.4261963665485382, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.913, 'eval_steps_per_second': 11.913, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:00<00:00,  1.54s/it]

{'eval_loss': 0.38703858852386475, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.645, 'eval_steps_per_second': 11.645, 'epoch': 3.0}


100%|██████████| 33/33 [01:02<00:00,  1.89s/it]


{'train_runtime': 62.2318, 'train_samples_per_second': 3.953, 'train_steps_per_second': 0.53, 'train_loss': 0.5535608927408854, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 140.79it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
100%|██████████| 1/1 [00:00<00:00, 164.66it/s]
Casting the dataset: 100%|██████████| 83/83 [00:00<00:00, 20755.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.63 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 101.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.5801814198493958, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.919, 'eval_steps_per_second': 11.919, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:40<00:17,  1.61s/it]
 67%|██████▋   | 22/33 [00:41<00:17,  1.61s/it]

{'eval_loss': 0.41802921891212463, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.089, 'eval_samples_per_second': 11.232, 'eval_steps_per_second': 11.232, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:01<00:00,  1.61s/it]

{'eval_loss': 0.37966933846473694, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0805, 'eval_samples_per_second': 12.424, 'eval_steps_per_second': 12.424, 'epoch': 3.0}


100%|██████████| 33/33 [01:03<00:00,  1.91s/it]


{'train_runtime': 63.1605, 'train_samples_per_second': 3.942, 'train_steps_per_second': 0.522, 'train_loss': 0.55296718713009, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 58.22it/s]
100%|██████████| 1/1 [00:00<00:00, 141.46it/s]
Casting the dataset: 100%|██████████| 83/83 [00:00<00:00, 16601.99 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 102.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.5801814198493958, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.371, 'eval_steps_per_second': 11.371, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:40<00:17,  1.59s/it]
 67%|██████▋   | 22/33 [00:40<00:17,  1.59s/it]

{'eval_loss': 0.41802921891212463, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.61, 'eval_steps_per_second': 11.61, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:02<00:00,  1.64s/it]

{'eval_loss': 0.37966933846473694, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0825, 'eval_samples_per_second': 12.121, 'eval_steps_per_second': 12.121, 'epoch': 3.0}


100%|██████████| 33/33 [01:04<00:00,  1.94s/it]


{'train_runtime': 64.0376, 'train_samples_per_second': 3.888, 'train_steps_per_second': 0.515, 'train_loss': 0.55296718713009, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.74it/s]
100%|██████████| 1/1 [00:00<00:00, 164.68it/s]
Casting the dataset: 100%|██████████| 83/83 [00:00<00:00, 20746.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 99.64 examples/s] 
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.5801814198493958, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.419, 'eval_steps_per_second': 11.419, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:40<00:17,  1.58s/it]
 67%|██████▋   | 22/33 [00:40<00:17,  1.58s/it]

{'eval_loss': 0.41802921891212463, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0899, 'eval_samples_per_second': 11.127, 'eval_steps_per_second': 11.127, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:02<00:00,  1.65s/it]

{'eval_loss': 0.37966933846473694, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.076, 'eval_steps_per_second': 12.076, 'epoch': 3.0}


100%|██████████| 33/33 [01:03<00:00,  1.93s/it]


{'train_runtime': 63.5561, 'train_samples_per_second': 3.918, 'train_steps_per_second': 0.519, 'train_loss': 0.55296718713009, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 123.90it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.45it/s]
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
Casting the dataset: 100%|██████████| 83/83 [00:00<00:00, 20482.89 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 94.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5801814198493958, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.503, 'eval_steps_per_second': 11.503, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:41<00:17,  1.63s/it]
 67%|██████▋   | 22/33 [00:41<00:17,  1.63s/it]

{'eval_loss': 0.41802921891212463, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0845, 'eval_samples_per_second': 11.828, 'eval_steps_per_second': 11.828, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:02<00:00,  1.63s/it]

{'eval_loss': 0.37966933846473694, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.989, 'eval_steps_per_second': 11.989, 'epoch': 3.0}


100%|██████████| 33/33 [01:03<00:00,  1.93s/it]


{'train_runtime': 63.7716, 'train_samples_per_second': 3.905, 'train_steps_per_second': 0.517, 'train_loss': 0.55296718713009, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.57it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.30it/s]
100%|██████████| 1/1 [00:00<00:00, 164.95it/s]
Casting the dataset: 100%|██████████| 83/83 [00:00<00:00, 20488.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 956.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 99.30 examples/s] 
Map: 100%|██████████| 1/1 [00:00<00:00, 327.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5801814198493958, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.08, 'eval_samples_per_second': 12.505, 'eval_steps_per_second': 12.505, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:41<00:18,  1.66s/it]
 67%|██████▋   | 22/33 [00:41<00:18,  1.66s/it]

{'eval_loss': 0.41802921891212463, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.623, 'eval_steps_per_second': 11.623, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:02<00:00,  1.62s/it]

{'eval_loss': 0.37966933846473694, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.087, 'eval_samples_per_second': 11.498, 'eval_steps_per_second': 11.498, 'epoch': 3.0}


100%|██████████| 33/33 [01:04<00:00,  1.94s/it]


{'train_runtime': 64.1603, 'train_samples_per_second': 3.881, 'train_steps_per_second': 0.514, 'train_loss': 0.55296718713009, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.42it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.70it/s]
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
Casting the dataset: 100%|██████████| 84/84 [00:00<00:00, 27548.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 965.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 99.18 examples/s] 
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.5804488658905029, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.094, 'eval_samples_per_second': 10.642, 'eval_steps_per_second': 10.642, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:41<00:18,  1.69s/it]
 67%|██████▋   | 22/33 [00:41<00:18,  1.69s/it]

{'eval_loss': 0.4287866950035095, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.1264, 'eval_samples_per_second': 7.908, 'eval_steps_per_second': 7.908, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:02<00:00,  1.61s/it]

{'eval_loss': 0.38744476437568665, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.1283, 'eval_samples_per_second': 7.795, 'eval_steps_per_second': 7.795, 'epoch': 3.0}


100%|██████████| 33/33 [01:03<00:00,  1.93s/it]


{'train_runtime': 63.824, 'train_samples_per_second': 3.948, 'train_steps_per_second': 0.517, 'train_loss': 0.5538953145345052, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 164.74it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 162.37it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
Casting the dataset: 100%|██████████| 84/84 [00:00<00:00, 21004.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 966.43 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 99.87 examples/s] 
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.5804488658905029, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1306, 'eval_samples_per_second': 7.656, 'eval_steps_per_second': 7.656, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:40<00:18,  1.64s/it]
 67%|██████▋   | 22/33 [00:41<00:18,  1.64s/it]

{'eval_loss': 0.4287866950035095, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.1465, 'eval_samples_per_second': 6.825, 'eval_steps_per_second': 6.825, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:01<00:00,  1.63s/it]

{'eval_loss': 0.38744476437568665, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.1308, 'eval_samples_per_second': 7.643, 'eval_steps_per_second': 7.643, 'epoch': 3.0}


100%|██████████| 33/33 [01:03<00:00,  1.92s/it]


{'train_runtime': 63.3646, 'train_samples_per_second': 3.977, 'train_steps_per_second': 0.521, 'train_loss': 0.5538953145345052, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 164.46it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.68it/s]
100%|██████████| 1/1 [00:00<00:00, 165.21it/s]
Casting the dataset: 100%|██████████| 84/84 [00:00<00:00, 28017.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 98.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.64 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.89 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5804488658905029, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1121, 'eval_samples_per_second': 8.918, 'eval_steps_per_second': 8.918, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:40<00:17,  1.62s/it]
 67%|██████▋   | 22/33 [00:40<00:17,  1.62s/it]

{'eval_loss': 0.4287866950035095, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.1095, 'eval_samples_per_second': 9.134, 'eval_steps_per_second': 9.134, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:01<00:00,  1.62s/it]

{'eval_loss': 0.38744476437568665, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.1309, 'eval_samples_per_second': 7.641, 'eval_steps_per_second': 7.641, 'epoch': 3.0}


100%|██████████| 33/33 [01:02<00:00,  1.91s/it]


{'train_runtime': 62.8895, 'train_samples_per_second': 4.007, 'train_steps_per_second': 0.525, 'train_loss': 0.5538953145345052, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 165.12it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.64it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
Casting the dataset: 100%|██████████| 84/84 [00:00<00:00, 20638.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 99.39 examples/s] 
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.5804488658905029, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1243, 'eval_samples_per_second': 8.046, 'eval_steps_per_second': 8.046, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:40<00:17,  1.61s/it]
 67%|██████▋   | 22/33 [00:40<00:17,  1.61s/it]

{'eval_loss': 0.4287866950035095, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.1223, 'eval_samples_per_second': 8.178, 'eval_steps_per_second': 8.178, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:02<00:00,  1.68s/it]

{'eval_loss': 0.38744476437568665, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.1372, 'eval_samples_per_second': 7.29, 'eval_steps_per_second': 7.29, 'epoch': 3.0}


100%|██████████| 33/33 [01:03<00:00,  1.92s/it]


{'train_runtime': 63.4607, 'train_samples_per_second': 3.971, 'train_steps_per_second': 0.52, 'train_loss': 0.5538953145345052, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 139.81it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.52it/s]
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
Casting the dataset: 100%|██████████| 84/84 [00:00<00:00, 20758.99 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 100.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.5804488658905029, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0971, 'eval_samples_per_second': 10.302, 'eval_steps_per_second': 10.302, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:40<00:17,  1.61s/it]
 67%|██████▋   | 22/33 [00:40<00:17,  1.61s/it]

{'eval_loss': 0.4287866950035095, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.1133, 'eval_samples_per_second': 8.824, 'eval_steps_per_second': 8.824, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:01<00:00,  1.66s/it]

{'eval_loss': 0.38744476437568665, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.1146, 'eval_samples_per_second': 8.725, 'eval_steps_per_second': 8.725, 'epoch': 3.0}


100%|██████████| 33/33 [01:03<00:00,  1.92s/it]


{'train_runtime': 63.2091, 'train_samples_per_second': 3.987, 'train_steps_per_second': 0.522, 'train_loss': 0.5538953145345052, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 110.39it/s]
100%|██████████| 1/1 [00:00<00:00, 164.88it/s]
Casting the dataset: 100%|██████████| 85/85 [00:00<00:00, 21061.96 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 96.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5799807906150818, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.372, 'eval_steps_per_second': 12.372, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:40<00:18,  1.68s/it]
 67%|██████▋   | 22/33 [00:40<00:18,  1.68s/it]

{'eval_loss': 0.427337110042572, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0835, 'eval_samples_per_second': 11.979, 'eval_steps_per_second': 11.979, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:02<00:00,  1.73s/it]

{'eval_loss': 0.3867778480052948, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.221, 'eval_steps_per_second': 12.221, 'epoch': 3.0}


100%|██████████| 33/33 [01:03<00:00,  1.93s/it]


{'train_runtime': 63.6699, 'train_samples_per_second': 4.005, 'train_steps_per_second': 0.518, 'train_loss': 0.5543742902351149, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 164.86it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.15it/s]
100%|██████████| 1/1 [00:00<00:00, 141.86it/s]
Casting the dataset: 100%|██████████| 85/85 [00:00<00:00, 28369.21 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.45 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 96.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.5799807906150818, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.927, 'eval_steps_per_second': 11.927, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:41<00:19,  1.74s/it]
 67%|██████▋   | 22/33 [00:41<00:19,  1.74s/it]

{'eval_loss': 0.427337110042572, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.366, 'eval_steps_per_second': 12.366, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:02<00:00,  1.68s/it]

{'eval_loss': 0.3867778480052948, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.353, 'eval_steps_per_second': 12.353, 'epoch': 3.0}


100%|██████████| 33/33 [01:04<00:00,  1.95s/it]


{'train_runtime': 64.1853, 'train_samples_per_second': 3.973, 'train_steps_per_second': 0.514, 'train_loss': 0.5543742902351149, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 164.44it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
Casting the dataset: 100%|██████████| 85/85 [00:00<00:00, 27811.52 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 961.78 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 96.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.5799807906150818, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.65, 'eval_steps_per_second': 11.65, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:41<00:19,  1.74s/it]
 67%|██████▋   | 22/33 [00:41<00:19,  1.74s/it]

{'eval_loss': 0.427337110042572, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.704, 'eval_steps_per_second': 11.704, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:02<00:00,  1.70s/it]

{'eval_loss': 0.3867778480052948, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.21, 'eval_steps_per_second': 12.21, 'epoch': 3.0}


100%|██████████| 33/33 [01:04<00:00,  1.95s/it]


{'train_runtime': 64.2972, 'train_samples_per_second': 3.966, 'train_steps_per_second': 0.513, 'train_loss': 0.5543742902351149, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.31it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 139.29it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 85/85 [00:00<00:00, 27926.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 97.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5799807906150818, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0843, 'eval_samples_per_second': 11.859, 'eval_steps_per_second': 11.859, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:41<00:18,  1.68s/it]
 67%|██████▋   | 22/33 [00:41<00:18,  1.68s/it]

{'eval_loss': 0.427337110042572, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0837, 'eval_samples_per_second': 11.944, 'eval_steps_per_second': 11.944, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:02<00:00,  1.70s/it]

{'eval_loss': 0.3867778480052948, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.006, 'eval_steps_per_second': 13.006, 'epoch': 3.0}


100%|██████████| 33/33 [01:03<00:00,  1.93s/it]


{'train_runtime': 63.6734, 'train_samples_per_second': 4.005, 'train_steps_per_second': 0.518, 'train_loss': 0.5543742902351149, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 165.18it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
Casting the dataset: 100%|██████████| 85/85 [00:00<00:00, 20991.28 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 96.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.5799807906150818, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.631, 'eval_steps_per_second': 11.631, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:41<00:18,  1.69s/it]
 67%|██████▋   | 22/33 [00:41<00:18,  1.69s/it]

{'eval_loss': 0.427337110042572, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.368, 'eval_steps_per_second': 12.368, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:02<00:00,  1.74s/it]

{'eval_loss': 0.3867778480052948, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.794, 'eval_steps_per_second': 11.794, 'epoch': 3.0}


100%|██████████| 33/33 [01:04<00:00,  1.95s/it]


{'train_runtime': 64.3551, 'train_samples_per_second': 3.962, 'train_steps_per_second': 0.513, 'train_loss': 0.5543742902351149, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
100%|██████████| 1/1 [00:00<00:00, 164.77it/s]
Casting the dataset: 100%|██████████| 86/86 [00:00<00:00, 21496.43 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1003.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 96.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.579541027545929, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.984, 'eval_steps_per_second': 11.984, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:42<00:19,  1.81s/it]
 67%|██████▋   | 22/33 [00:42<00:19,  1.81s/it]

{'eval_loss': 0.41399917006492615, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.027, 'eval_steps_per_second': 13.027, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.81s/it]

{'eval_loss': 0.3771790862083435, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.778, 'eval_steps_per_second': 11.778, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  2.00s/it]


{'train_runtime': 65.9798, 'train_samples_per_second': 3.91, 'train_steps_per_second': 0.5, 'train_loss': 0.5512459494850852, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.43it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.48it/s]
100%|██████████| 1/1 [00:00<00:00, 164.70it/s]
Casting the dataset: 100%|██████████| 86/86 [00:00<00:00, 21311.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 932.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 96.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.63 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.579541027545929, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0764, 'eval_samples_per_second': 13.082, 'eval_steps_per_second': 13.082, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:42<00:19,  1.82s/it]
 67%|██████▋   | 22/33 [00:42<00:19,  1.82s/it]

{'eval_loss': 0.41399917006492615, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.851, 'eval_steps_per_second': 12.851, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.75s/it]

{'eval_loss': 0.3771790862083435, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.851, 'eval_steps_per_second': 11.851, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.97s/it]


{'train_runtime': 65.0475, 'train_samples_per_second': 3.966, 'train_steps_per_second': 0.507, 'train_loss': 0.5512459494850852, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.63it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
100%|██████████| 1/1 [00:00<00:00, 164.34it/s]
Casting the dataset: 100%|██████████| 86/86 [00:00<00:00, 21500.28 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 96.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.579541027545929, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.18, 'eval_steps_per_second': 12.18, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:42<00:19,  1.80s/it]
 67%|██████▋   | 22/33 [00:42<00:19,  1.80s/it]

{'eval_loss': 0.41399917006492615, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.217, 'eval_steps_per_second': 12.217, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.73s/it]

{'eval_loss': 0.3771790862083435, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.924, 'eval_steps_per_second': 11.924, 'epoch': 3.0}


100%|██████████| 33/33 [01:04<00:00,  1.96s/it]


{'train_runtime': 64.7286, 'train_samples_per_second': 3.986, 'train_steps_per_second': 0.51, 'train_loss': 0.5512459494850852, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 165.00it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
100%|██████████| 1/1 [00:00<00:00, 165.24it/s]
Casting the dataset: 100%|██████████| 86/86 [00:00<00:00, 21267.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 97.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.579541027545929, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.214, 'eval_steps_per_second': 12.214, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:41<00:19,  1.75s/it]
 67%|██████▋   | 22/33 [00:41<00:19,  1.75s/it]

{'eval_loss': 0.41399917006492615, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0845, 'eval_samples_per_second': 11.836, 'eval_steps_per_second': 11.836, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.80s/it]

{'eval_loss': 0.3771790862083435, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0837, 'eval_samples_per_second': 11.946, 'eval_steps_per_second': 11.946, 'epoch': 3.0}


100%|██████████| 33/33 [01:04<00:00,  1.97s/it]


{'train_runtime': 64.871, 'train_samples_per_second': 3.977, 'train_steps_per_second': 0.509, 'train_loss': 0.5512459494850852, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 164.45it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.68it/s]
100%|██████████| 1/1 [00:00<00:00, 141.44it/s]
Casting the dataset: 100%|██████████| 86/86 [00:00<00:00, 21505.40 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 97.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 280.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.37 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.579541027545929, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0764, 'eval_samples_per_second': 13.092, 'eval_steps_per_second': 13.092, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:41<00:19,  1.76s/it]
 67%|██████▋   | 22/33 [00:41<00:19,  1.76s/it]

{'eval_loss': 0.41399917006492615, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.203, 'eval_steps_per_second': 12.203, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.81s/it]

{'eval_loss': 0.3771790862083435, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.196, 'eval_steps_per_second': 13.196, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.98s/it]


{'train_runtime': 65.2691, 'train_samples_per_second': 3.953, 'train_steps_per_second': 0.506, 'train_loss': 0.5512459494850852, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 164.61it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.32it/s]
100%|██████████| 1/1 [00:00<00:00, 141.34it/s]
Casting the dataset: 100%|██████████| 87/87 [00:00<00:00, 17226.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 96.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5792056918144226, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.226, 'eval_steps_per_second': 12.226, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:42<00:20,  1.84s/it]
 67%|██████▋   | 22/33 [00:42<00:20,  1.84s/it]

{'eval_loss': 0.4249471426010132, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.194, 'eval_steps_per_second': 13.194, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.80s/it]

{'eval_loss': 0.385829359292984, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0759, 'eval_samples_per_second': 13.182, 'eval_steps_per_second': 13.182, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.98s/it]


{'train_runtime': 65.3068, 'train_samples_per_second': 3.997, 'train_steps_per_second': 0.505, 'train_loss': 0.5521638465650154, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.43it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
Casting the dataset: 100%|██████████| 87/87 [00:00<00:00, 21758.06 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 96.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5792056918144226, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.369, 'eval_steps_per_second': 12.369, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:42<00:20,  1.86s/it]
 67%|██████▋   | 22/33 [00:43<00:20,  1.86s/it]

{'eval_loss': 0.4249471426010132, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.922, 'eval_steps_per_second': 11.922, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.82s/it]

{'eval_loss': 0.385829359292984, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.649, 'eval_steps_per_second': 11.649, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  2.00s/it]


{'train_runtime': 65.9324, 'train_samples_per_second': 3.959, 'train_steps_per_second': 0.501, 'train_loss': 0.5521638465650154, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.43it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.61it/s]
100%|██████████| 1/1 [00:00<00:00, 164.60it/s]
Casting the dataset: 100%|██████████| 87/87 [00:00<00:00, 21577.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 955.20 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 96.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.43 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.5792056918144226, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.215, 'eval_steps_per_second': 12.215, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:42<00:19,  1.79s/it]
 67%|██████▋   | 22/33 [00:42<00:19,  1.79s/it]

{'eval_loss': 0.4249471426010132, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.212, 'eval_steps_per_second': 12.212, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.84s/it]

{'eval_loss': 0.385829359292984, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.218, 'eval_steps_per_second': 12.218, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.98s/it]


{'train_runtime': 65.4927, 'train_samples_per_second': 3.985, 'train_steps_per_second': 0.504, 'train_loss': 0.5521638465650154, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 164.81it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 152.19it/s]
100%|██████████| 1/1 [00:00<00:00, 164.44it/s]
Casting the dataset: 100%|██████████| 87/87 [00:00<00:00, 28997.49 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 488.28 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 96.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.79 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5792056918144226, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0754, 'eval_samples_per_second': 13.263, 'eval_steps_per_second': 13.263, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:42<00:20,  1.84s/it]
 67%|██████▋   | 22/33 [00:42<00:20,  1.84s/it]

{'eval_loss': 0.4249471426010132, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.711, 'eval_steps_per_second': 11.711, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.86s/it]

{'eval_loss': 0.385829359292984, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.855, 'eval_steps_per_second': 12.855, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  2.00s/it]


{'train_runtime': 65.9807, 'train_samples_per_second': 3.956, 'train_steps_per_second': 0.5, 'train_loss': 0.5521638465650154, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 165.02it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.25it/s]
100%|██████████| 1/1 [00:00<00:00, 164.75it/s]
Casting the dataset: 100%|██████████| 87/87 [00:00<00:00, 28588.57 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 90.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.5792056918144226, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.254, 'eval_steps_per_second': 12.254, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:42<00:20,  1.85s/it]
 67%|██████▋   | 22/33 [00:42<00:20,  1.85s/it]

{'eval_loss': 0.4249471426010132, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.188, 'eval_steps_per_second': 12.188, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.79s/it]

{'eval_loss': 0.385829359292984, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0774, 'eval_samples_per_second': 12.923, 'eval_steps_per_second': 12.923, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.98s/it]


{'train_runtime': 65.2441, 'train_samples_per_second': 4.0, 'train_steps_per_second': 0.506, 'train_loss': 0.5521638465650154, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 164.76it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.91it/s]
100%|██████████| 1/1 [00:00<00:00, 164.81it/s]
Casting the dataset: 100%|██████████| 88/88 [00:00<00:00, 28905.85 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.50 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 96.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.5791907906532288, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.22, 'eval_steps_per_second': 12.22, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:42<00:20,  1.85s/it]
 67%|██████▋   | 22/33 [00:42<00:20,  1.85s/it]

{'eval_loss': 0.4198576807975769, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.017, 'eval_steps_per_second': 13.017, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.91s/it]

{'eval_loss': 0.3840040862560272, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.222, 'eval_steps_per_second': 12.222, 'epoch': 3.0}


100%|██████████| 33/33 [01:06<00:00,  2.01s/it]


{'train_runtime': 66.2013, 'train_samples_per_second': 3.988, 'train_steps_per_second': 0.498, 'train_loss': 0.5502479437625769, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.14it/s]
100%|██████████| 1/1 [00:00<00:00, 164.35it/s]
Casting the dataset: 100%|██████████| 88/88 [00:00<00:00, 22005.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 96.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 483.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 324.61 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5791907906532288, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.065, 'eval_steps_per_second': 12.065, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:42<00:20,  1.83s/it]
 67%|██████▋   | 22/33 [00:42<00:20,  1.83s/it]

{'eval_loss': 0.4198576807975769, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.094, 'eval_samples_per_second': 10.636, 'eval_steps_per_second': 10.636, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.89s/it]

{'eval_loss': 0.3840040862560272, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.366, 'eval_steps_per_second': 12.366, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  2.00s/it]


{'train_runtime': 65.8868, 'train_samples_per_second': 4.007, 'train_steps_per_second': 0.501, 'train_loss': 0.5502479437625769, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.75it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.43it/s]
100%|██████████| 1/1 [00:00<00:00, 164.77it/s]
Casting the dataset: 100%|██████████| 88/88 [00:00<00:00, 29349.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 964.21 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 97.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.5791907906532288, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.142, 'eval_steps_per_second': 12.142, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:43<00:21,  1.91s/it]
 67%|██████▋   | 22/33 [00:43<00:21,  1.91s/it]

{'eval_loss': 0.4198576807975769, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.073, 'eval_steps_per_second': 12.073, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.84s/it]

{'eval_loss': 0.3840040862560272, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.373, 'eval_steps_per_second': 13.373, 'epoch': 3.0}


100%|██████████| 33/33 [01:06<00:00,  2.01s/it]


{'train_runtime': 66.2144, 'train_samples_per_second': 3.987, 'train_steps_per_second': 0.498, 'train_loss': 0.5502479437625769, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.75it/s]
100%|██████████| 1/1 [00:00<00:00, 141.43it/s]
Casting the dataset: 100%|██████████| 88/88 [00:00<00:00, 19515.61 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 954.12 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 96.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 315.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 244.10 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.5791907906532288, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.199, 'eval_steps_per_second': 12.199, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:42<00:20,  1.84s/it]
 67%|██████▋   | 22/33 [00:42<00:20,  1.84s/it]

{'eval_loss': 0.4198576807975769, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0738, 'eval_samples_per_second': 13.55, 'eval_steps_per_second': 13.55, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.89s/it]

{'eval_loss': 0.3840040862560272, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.913, 'eval_steps_per_second': 11.913, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  2.00s/it]


{'train_runtime': 65.8666, 'train_samples_per_second': 4.008, 'train_steps_per_second': 0.501, 'train_loss': 0.5502479437625769, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.65it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.74it/s]
100%|██████████| 1/1 [00:00<00:00, 141.32it/s]
Casting the dataset: 100%|██████████| 88/88 [00:00<00:00, 21712.97 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.98 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 96.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.35 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5791907906532288, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.218, 'eval_steps_per_second': 12.218, 'epoch': 1.0}


 67%|██████▋   | 22/33 [00:42<00:20,  1.84s/it]
 67%|██████▋   | 22/33 [00:42<00:20,  1.84s/it]

{'eval_loss': 0.4198576807975769, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.511, 'eval_steps_per_second': 11.511, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.90s/it]

{'eval_loss': 0.3840040862560272, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.215, 'eval_steps_per_second': 12.215, 'epoch': 3.0}


100%|██████████| 33/33 [01:06<00:00,  2.00s/it]


{'train_runtime': 66.0143, 'train_samples_per_second': 3.999, 'train_steps_per_second': 0.5, 'train_loss': 0.5502479437625769, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.29it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.30it/s]
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
Casting the dataset: 100%|██████████| 89/89 [00:00<00:00, 21946.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 959.58 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 95.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.5690913200378418, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.063, 'eval_steps_per_second': 12.063, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:43<00:17,  1.49s/it]

{'eval_loss': 0.386683851480484, 'eval_precision': 0.5714285714285714, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5714285714285714, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0874, 'eval_samples_per_second': 11.438, 'eval_steps_per_second': 11.438, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:05<00:00,  1.49s/it]

{'eval_loss': 0.3660220801830292, 'eval_precision': 0.5, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5333333333333333, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6, 'eval_runtime': 0.0884, 'eval_samples_per_second': 11.308, 'eval_steps_per_second': 11.308, 'epoch': 3.0}


100%|██████████| 36/36 [01:07<00:00,  1.86s/it]


{'train_runtime': 67.1358, 'train_samples_per_second': 3.977, 'train_steps_per_second': 0.536, 'train_loss': 0.5059441990322537, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 76.31it/s]
100%|██████████| 1/1 [00:00<00:00, 58.33it/s]
100%|██████████| 1/1 [00:00<00:00, 164.90it/s]
Casting the dataset: 100%|██████████| 89/89 [00:00<00:00, 22008.91 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 938.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 95.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.6623466610908508, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.009, 'eval_steps_per_second': 13.009, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:18,  1.53s/it]

{'eval_loss': 0.4584636688232422, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0899, 'eval_samples_per_second': 11.118, 'eval_steps_per_second': 11.118, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.59s/it]

{'eval_loss': 0.4397616982460022, 'eval_precision': 0.125, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.13333333333333333, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.2, 'eval_runtime': 0.0893, 'eval_samples_per_second': 11.203, 'eval_steps_per_second': 11.203, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.92s/it]


{'train_runtime': 69.1022, 'train_samples_per_second': 3.864, 'train_steps_per_second': 0.521, 'train_loss': 0.48862679799397785, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 76.20it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 26.02it/s]
100%|██████████| 1/1 [00:00<00:00, 165.28it/s]
Casting the dataset: 100%|██████████| 89/89 [00:00<00:00, 29666.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 94.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.35 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.6623466610908508, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0825, 'eval_samples_per_second': 12.119, 'eval_steps_per_second': 12.119, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:43<00:18,  1.54s/it]

{'eval_loss': 0.4584636688232422, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0884, 'eval_samples_per_second': 11.31, 'eval_steps_per_second': 11.31, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:06<00:00,  1.50s/it]

{'eval_loss': 0.4397616982460022, 'eval_precision': 0.125, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.13333333333333333, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.2, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.065, 'eval_steps_per_second': 12.065, 'epoch': 3.0}


100%|██████████| 36/36 [01:07<00:00,  1.88s/it]


{'train_runtime': 67.6802, 'train_samples_per_second': 3.945, 'train_steps_per_second': 0.532, 'train_loss': 0.48862679799397785, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.82it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.35it/s]
100%|██████████| 1/1 [00:00<00:00, 141.33it/s]
Casting the dataset: 100%|██████████| 89/89 [00:00<00:00, 22017.99 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 943.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.51 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 94.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6623466610908508, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.649, 'eval_steps_per_second': 11.649, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:18,  1.53s/it]

{'eval_loss': 0.4584636688232422, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0773, 'eval_samples_per_second': 12.944, 'eval_steps_per_second': 12.944, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.55s/it]

{'eval_loss': 0.4397616982460022, 'eval_precision': 0.125, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.13333333333333333, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.2, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.065, 'eval_steps_per_second': 12.065, 'epoch': 3.0}


100%|██████████| 36/36 [01:08<00:00,  1.91s/it]


{'train_runtime': 68.7517, 'train_samples_per_second': 3.884, 'train_steps_per_second': 0.524, 'train_loss': 0.48862679799397785, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.54it/s]
100%|██████████| 1/1 [00:00<00:00, 141.38it/s]
Casting the dataset: 100%|██████████| 89/89 [00:00<00:00, 22023.19 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.51 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 96.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.10 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6623466610908508, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.131, 'eval_steps_per_second': 12.131, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:18,  1.54s/it]

{'eval_loss': 0.4584636688232422, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.633, 'eval_steps_per_second': 11.633, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:06<00:00,  1.50s/it]

{'eval_loss': 0.4397616982460022, 'eval_precision': 0.125, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.13333333333333333, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.2, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.219, 'eval_steps_per_second': 12.219, 'epoch': 3.0}


100%|██████████| 36/36 [01:08<00:00,  1.89s/it]


{'train_runtime': 68.1641, 'train_samples_per_second': 3.917, 'train_steps_per_second': 0.528, 'train_loss': 0.48862679799397785, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 130.53it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.92it/s]
100%|██████████| 1/1 [00:00<00:00, 164.86it/s]
Casting the dataset: 100%|██████████| 90/90 [00:00<00:00, 22153.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.38 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 940.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 96.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.6611868739128113, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0923, 'eval_samples_per_second': 10.83, 'eval_steps_per_second': 10.83, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:18,  1.57s/it]

{'eval_loss': 0.4583664536476135, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.913, 'eval_steps_per_second': 11.913, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.56s/it]

{'eval_loss': 0.43991413712501526, 'eval_precision': 0.125, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.13333333333333333, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.2, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.072, 'eval_steps_per_second': 12.072, 'epoch': 3.0}


100%|██████████| 36/36 [01:08<00:00,  1.90s/it]


{'train_runtime': 68.3884, 'train_samples_per_second': 3.948, 'train_steps_per_second': 0.526, 'train_loss': 0.4864115185207791, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.24it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.68it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
Casting the dataset: 100%|██████████| 90/90 [00:00<00:00, 29983.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 962.00 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 96.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.59 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6611868739128113, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.925, 'eval_steps_per_second': 11.925, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:18,  1.54s/it]

{'eval_loss': 0.4583664536476135, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0827, 'eval_samples_per_second': 12.093, 'eval_steps_per_second': 12.093, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.61s/it]

{'eval_loss': 0.43991413712501526, 'eval_precision': 0.125, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.13333333333333333, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.2, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.511, 'eval_steps_per_second': 11.511, 'epoch': 3.0}


100%|██████████| 36/36 [01:08<00:00,  1.91s/it]


{'train_runtime': 68.7463, 'train_samples_per_second': 3.927, 'train_steps_per_second': 0.524, 'train_loss': 0.4864115185207791, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.63it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.42it/s]
100%|██████████| 1/1 [00:00<00:00, 164.41it/s]
Casting the dataset: 100%|██████████| 90/90 [00:00<00:00, 22252.26 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 953.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 962.88 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 94.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.48 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6611868739128113, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0774, 'eval_samples_per_second': 12.925, 'eval_steps_per_second': 12.925, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:19,  1.59s/it]

{'eval_loss': 0.4583664536476135, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.625, 'eval_steps_per_second': 11.625, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:06<00:00,  1.54s/it]

{'eval_loss': 0.43991413712501526, 'eval_precision': 0.125, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.13333333333333333, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.2, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.637, 'eval_steps_per_second': 11.637, 'epoch': 3.0}


100%|██████████| 36/36 [01:08<00:00,  1.90s/it]


{'train_runtime': 68.3188, 'train_samples_per_second': 3.952, 'train_steps_per_second': 0.527, 'train_loss': 0.4864115185207791, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.01it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.44it/s]
100%|██████████| 1/1 [00:00<00:00, 164.94it/s]
Casting the dataset: 100%|██████████| 90/90 [00:00<00:00, 29999.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 959.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 964.21 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 95.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 431.87 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.6611868739128113, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.854, 'eval_steps_per_second': 12.854, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:18,  1.54s/it]

{'eval_loss': 0.4583664536476135, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0804, 'eval_samples_per_second': 12.44, 'eval_steps_per_second': 12.44, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.59s/it]

{'eval_loss': 0.43991413712501526, 'eval_precision': 0.125, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.13333333333333333, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.2, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.698, 'eval_steps_per_second': 12.698, 'epoch': 3.0}


100%|██████████| 36/36 [01:08<00:00,  1.90s/it]


{'train_runtime': 68.3519, 'train_samples_per_second': 3.95, 'train_steps_per_second': 0.527, 'train_loss': 0.4864115185207791, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.75it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 36.74it/s]
100%|██████████| 1/1 [00:00<00:00, 164.92it/s]
Casting the dataset: 100%|██████████| 90/90 [00:00<00:00, 30011.72 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 96.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6611868739128113, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.645, 'eval_steps_per_second': 11.645, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:19,  1.61s/it]

{'eval_loss': 0.4583664536476135, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.517, 'eval_steps_per_second': 12.517, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.59s/it]

{'eval_loss': 0.43991413712501526, 'eval_precision': 0.125, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.13333333333333333, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.2, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.216, 'eval_steps_per_second': 12.216, 'epoch': 3.0}


100%|██████████| 36/36 [01:08<00:00,  1.91s/it]


{'train_runtime': 68.8394, 'train_samples_per_second': 3.922, 'train_steps_per_second': 0.523, 'train_loss': 0.4864115185207791, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.91it/s]
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
Casting the dataset: 100%|██████████| 91/91 [00:00<00:00, 22750.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 96.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6605718731880188, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.06, 'eval_steps_per_second': 12.06, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:43<00:18,  1.57s/it]

{'eval_loss': 0.45598143339157104, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.023, 'eval_steps_per_second': 13.023, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.63s/it]

{'eval_loss': 0.43762701749801636, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.09, 'eval_samples_per_second': 11.112, 'eval_steps_per_second': 11.112, 'epoch': 3.0}


100%|██████████| 36/36 [01:08<00:00,  1.90s/it]


{'train_runtime': 68.5558, 'train_samples_per_second': 3.982, 'train_steps_per_second': 0.525, 'train_loss': 0.4858686129252116, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.00it/s]
100%|██████████| 1/1 [00:00<00:00, 141.44it/s]
Casting the dataset: 100%|██████████| 91/91 [00:00<00:00, 17998.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 949.58 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 95.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.6605718731880188, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0865, 'eval_samples_per_second': 11.563, 'eval_steps_per_second': 11.563, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:19,  1.63s/it]

{'eval_loss': 0.45598143339157104, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0888, 'eval_samples_per_second': 11.266, 'eval_steps_per_second': 11.266, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.63s/it]

{'eval_loss': 0.43762701749801636, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.546, 'eval_steps_per_second': 11.546, 'epoch': 3.0}


100%|██████████| 36/36 [01:08<00:00,  1.91s/it]


{'train_runtime': 68.598, 'train_samples_per_second': 3.98, 'train_steps_per_second': 0.525, 'train_loss': 0.4858686129252116, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.88it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
100%|██████████| 1/1 [00:00<00:00, 165.29it/s]
Casting the dataset: 100%|██████████| 91/91 [00:00<00:00, 18045.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 96.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 334.26 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6605718731880188, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0789, 'eval_samples_per_second': 12.675, 'eval_steps_per_second': 12.675, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:19,  1.59s/it]

{'eval_loss': 0.45598143339157104, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.087, 'eval_samples_per_second': 11.493, 'eval_steps_per_second': 11.493, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.62s/it]

{'eval_loss': 0.43762701749801636, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.0833, 'eval_samples_per_second': 11.998, 'eval_steps_per_second': 11.998, 'epoch': 3.0}


100%|██████████| 36/36 [01:08<00:00,  1.91s/it]


{'train_runtime': 68.6709, 'train_samples_per_second': 3.975, 'train_steps_per_second': 0.524, 'train_loss': 0.4858686129252116, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.77it/s]
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
Casting the dataset: 100%|██████████| 91/91 [00:00<00:00, 30335.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 94.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.39 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.6605718731880188, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0872, 'eval_samples_per_second': 11.469, 'eval_steps_per_second': 11.469, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:19,  1.62s/it]

{'eval_loss': 0.45598143339157104, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0905, 'eval_samples_per_second': 11.044, 'eval_steps_per_second': 11.044, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.60s/it]

{'eval_loss': 0.43762701749801636, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.03, 'eval_steps_per_second': 12.03, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.92s/it]


{'train_runtime': 69.0152, 'train_samples_per_second': 3.956, 'train_steps_per_second': 0.522, 'train_loss': 0.4858686129252116, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.48it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.88it/s]
100%|██████████| 1/1 [00:00<00:00, 164.97it/s]
Casting the dataset: 100%|██████████| 91/91 [00:00<00:00, 30347.59 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 6241.52 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 960.23 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 92.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6605718731880188, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.51, 'eval_steps_per_second': 12.51, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:43<00:18,  1.58s/it]

{'eval_loss': 0.45598143339157104, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.764, 'eval_steps_per_second': 11.764, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:06<00:00,  1.63s/it]

{'eval_loss': 0.43762701749801636, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.9, 'eval_steps_per_second': 11.9, 'epoch': 3.0}


100%|██████████| 36/36 [01:08<00:00,  1.89s/it]


{'train_runtime': 68.0024, 'train_samples_per_second': 4.015, 'train_steps_per_second': 0.529, 'train_loss': 0.4858686129252116, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.73it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.46it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 92/92 [00:00<00:00, 22606.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 953.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 94.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.74 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.6603285670280457, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1042, 'eval_samples_per_second': 9.596, 'eval_steps_per_second': 9.596, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:19,  1.66s/it]

{'eval_loss': 0.45595794916152954, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.1051, 'eval_samples_per_second': 9.516, 'eval_steps_per_second': 9.516, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.62s/it]

{'eval_loss': 0.4383169412612915, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.1313, 'eval_samples_per_second': 7.614, 'eval_steps_per_second': 7.614, 'epoch': 3.0}


100%|██████████| 36/36 [01:08<00:00,  1.91s/it]


{'train_runtime': 68.6669, 'train_samples_per_second': 4.019, 'train_steps_per_second': 0.524, 'train_loss': 0.48556221856011283, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
100%|██████████| 1/1 [00:00<00:00, 165.11it/s]
Casting the dataset: 100%|██████████| 92/92 [00:00<00:00, 22666.59 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 949.37 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 94.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.6603285670280457, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1106, 'eval_samples_per_second': 9.043, 'eval_steps_per_second': 9.043, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:19,  1.61s/it]

{'eval_loss': 0.45595794916152954, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.1144, 'eval_samples_per_second': 8.745, 'eval_steps_per_second': 8.745, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.68s/it]

{'eval_loss': 0.4383169412612915, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.1224, 'eval_samples_per_second': 8.169, 'eval_steps_per_second': 8.169, 'epoch': 3.0}


100%|██████████| 36/36 [01:08<00:00,  1.92s/it]


{'train_runtime': 68.9762, 'train_samples_per_second': 4.001, 'train_steps_per_second': 0.522, 'train_loss': 0.48556221856011283, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
100%|██████████| 1/1 [00:00<00:00, 164.66it/s]
Casting the dataset: 100%|██████████| 92/92 [00:00<00:00, 22996.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 94.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.6603285670280457, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1315, 'eval_samples_per_second': 7.602, 'eval_steps_per_second': 7.602, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:19,  1.66s/it]

{'eval_loss': 0.45595794916152954, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.1331, 'eval_samples_per_second': 7.512, 'eval_steps_per_second': 7.512, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.60s/it]

{'eval_loss': 0.4383169412612915, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.149, 'eval_samples_per_second': 6.713, 'eval_steps_per_second': 6.713, 'epoch': 3.0}


100%|██████████| 36/36 [01:08<00:00,  1.90s/it]


{'train_runtime': 68.5635, 'train_samples_per_second': 4.025, 'train_steps_per_second': 0.525, 'train_loss': 0.48556221856011283, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.66it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.63it/s]
100%|██████████| 1/1 [00:00<00:00, 164.22it/s]
Casting the dataset: 100%|██████████| 92/92 [00:00<00:00, 30676.20 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 675.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 93.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 238.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.13 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6603285670280457, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1112, 'eval_samples_per_second': 8.993, 'eval_steps_per_second': 8.993, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:19,  1.63s/it]

{'eval_loss': 0.45595794916152954, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.1198, 'eval_samples_per_second': 8.348, 'eval_steps_per_second': 8.348, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.66s/it]

{'eval_loss': 0.4383169412612915, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.1112, 'eval_samples_per_second': 8.992, 'eval_steps_per_second': 8.992, 'epoch': 3.0}


100%|██████████| 36/36 [01:08<00:00,  1.91s/it]


{'train_runtime': 68.7461, 'train_samples_per_second': 4.015, 'train_steps_per_second': 0.524, 'train_loss': 0.48556221856011283, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.49it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 162.81it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
Casting the dataset: 100%|██████████| 92/92 [00:00<00:00, 30666.45 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 945.94 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.57 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 94.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.6603285670280457, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1222, 'eval_samples_per_second': 8.182, 'eval_steps_per_second': 8.182, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:20,  1.68s/it]

{'eval_loss': 0.45595794916152954, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.1333, 'eval_samples_per_second': 7.502, 'eval_steps_per_second': 7.502, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.61s/it]

{'eval_loss': 0.4383169412612915, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.1289, 'eval_samples_per_second': 7.76, 'eval_steps_per_second': 7.76, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.92s/it]


{'train_runtime': 69.0653, 'train_samples_per_second': 3.996, 'train_steps_per_second': 0.521, 'train_loss': 0.48556221856011283, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.27it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
100%|██████████| 1/1 [00:00<00:00, 164.98it/s]
Casting the dataset: 100%|██████████| 93/93 [00:00<00:00, 18603.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 488.85 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.07 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 95.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.83 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6596022844314575, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0744, 'eval_samples_per_second': 13.435, 'eval_steps_per_second': 13.435, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:20,  1.70s/it]

{'eval_loss': 0.4549624025821686, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.223, 'eval_steps_per_second': 12.223, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.74s/it]

{'eval_loss': 0.437471866607666, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.641, 'eval_steps_per_second': 11.641, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.93s/it]


{'train_runtime': 69.5357, 'train_samples_per_second': 4.012, 'train_steps_per_second': 0.518, 'train_loss': 0.4854365984598796, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.54it/s]
100%|██████████| 1/1 [00:00<00:00, 165.05it/s]
Casting the dataset: 100%|██████████| 93/93 [00:00<00:00, 22886.08 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 95.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6596022844314575, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.229, 'eval_steps_per_second': 12.229, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:20,  1.73s/it]

{'eval_loss': 0.4549624025821686, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.917, 'eval_steps_per_second': 11.917, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.68s/it]

{'eval_loss': 0.437471866607666, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.093, 'eval_samples_per_second': 10.757, 'eval_steps_per_second': 10.757, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.94s/it]


{'train_runtime': 69.8157, 'train_samples_per_second': 3.996, 'train_steps_per_second': 0.516, 'train_loss': 0.4854365984598796, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.44it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.74it/s]
100%|██████████| 1/1 [00:00<00:00, 141.48it/s]
Casting the dataset: 100%|██████████| 93/93 [00:00<00:00, 23254.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 956.29 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 93.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.6596022844314575, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.065, 'eval_steps_per_second': 12.065, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:20,  1.69s/it]

{'eval_loss': 0.4549624025821686, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.071, 'eval_steps_per_second': 12.071, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.73s/it]

{'eval_loss': 0.437471866607666, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.928, 'eval_steps_per_second': 11.928, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.95s/it]


{'train_runtime': 70.102, 'train_samples_per_second': 3.98, 'train_steps_per_second': 0.514, 'train_loss': 0.4854365984598796, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.69it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.63it/s]
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
Casting the dataset: 100%|██████████| 93/93 [00:00<00:00, 22991.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 988.52 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 951.09 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 95.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.35 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.6596022844314575, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.013, 'eval_steps_per_second': 13.013, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:21,  1.75s/it]

{'eval_loss': 0.4549624025821686, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.22, 'eval_steps_per_second': 12.22, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.69s/it]

{'eval_loss': 0.437471866607666, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.213, 'eval_steps_per_second': 12.213, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.94s/it]


{'train_runtime': 69.9006, 'train_samples_per_second': 3.991, 'train_steps_per_second': 0.515, 'train_loss': 0.4854365984598796, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.20it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.20it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
Casting the dataset: 100%|██████████| 93/93 [00:00<00:00, 23250.30 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.50 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 94.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.97 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6596022844314575, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.22, 'eval_steps_per_second': 12.22, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:20,  1.68s/it]

{'eval_loss': 0.4549624025821686, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.862, 'eval_steps_per_second': 12.862, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.75s/it]

{'eval_loss': 0.437471866607666, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.077, 'eval_samples_per_second': 12.992, 'eval_steps_per_second': 12.992, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.95s/it]


{'train_runtime': 70.0202, 'train_samples_per_second': 3.985, 'train_steps_per_second': 0.514, 'train_loss': 0.4854365984598796, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.48it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 94/94 [00:00<00:00, 23197.49 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Map: 100%|██████████| 94/94 [00:01<00:00, 92.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.56 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6603614091873169, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.99, 'eval_steps_per_second': 11.99, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:21,  1.81s/it]

{'eval_loss': 0.45561206340789795, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.077, 'eval_samples_per_second': 12.994, 'eval_steps_per_second': 12.994, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.80s/it]

{'eval_loss': 0.43814438581466675, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.919, 'eval_steps_per_second': 11.919, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.97s/it]


{'train_runtime': 70.7872, 'train_samples_per_second': 3.984, 'train_steps_per_second': 0.509, 'train_loss': 0.4854653676350911, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.70it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
100%|██████████| 1/1 [00:00<00:00, 141.27it/s]
Casting the dataset: 100%|██████████| 94/94 [00:00<00:00, 30946.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 94/94 [00:01<00:00, 91.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.64 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.6603614091873169, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.214, 'eval_steps_per_second': 12.214, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:21,  1.79s/it]

{'eval_loss': 0.45561206340789795, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.084, 'eval_steps_per_second': 12.084, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.74s/it]

{'eval_loss': 0.43814438581466675, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.0789, 'eval_samples_per_second': 12.67, 'eval_steps_per_second': 12.67, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.94s/it]


{'train_runtime': 69.8796, 'train_samples_per_second': 4.036, 'train_steps_per_second': 0.515, 'train_loss': 0.4854653676350911, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.04it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.82it/s]
100%|██████████| 1/1 [00:00<00:00, 141.70it/s]
Casting the dataset: 100%|██████████| 94/94 [00:00<00:00, 23197.49 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 94/94 [00:01<00:00, 93.31 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6603614091873169, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.916, 'eval_steps_per_second': 11.916, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:21,  1.75s/it]

{'eval_loss': 0.45561206340789795, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.249, 'eval_steps_per_second': 11.249, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.79s/it]

{'eval_loss': 0.43814438581466675, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.519, 'eval_steps_per_second': 11.519, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.95s/it]


{'train_runtime': 70.1714, 'train_samples_per_second': 4.019, 'train_steps_per_second': 0.513, 'train_loss': 0.4854653676350911, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
100%|██████████| 1/1 [00:00<00:00, 141.23it/s]
Casting the dataset: 100%|██████████| 94/94 [00:00<00:00, 18627.26 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 946.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 94/94 [00:01<00:00, 89.47 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.87 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6603614091873169, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.251, 'eval_steps_per_second': 12.251, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:21,  1.80s/it]

{'eval_loss': 0.45561206340789795, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.782, 'eval_steps_per_second': 11.782, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.73s/it]

{'eval_loss': 0.43814438581466675, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.376, 'eval_steps_per_second': 12.376, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.94s/it]


{'train_runtime': 69.8218, 'train_samples_per_second': 4.039, 'train_steps_per_second': 0.516, 'train_loss': 0.4854653676350911, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.67it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.61it/s]
100%|██████████| 1/1 [00:00<00:00, 166.74it/s]
Casting the dataset: 100%|██████████| 94/94 [00:00<00:00, 23177.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 94/94 [00:00<00:00, 94.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6603614091873169, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0763, 'eval_samples_per_second': 13.102, 'eval_steps_per_second': 13.102, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:20,  1.73s/it]

{'eval_loss': 0.45561206340789795, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.36, 'eval_steps_per_second': 12.36, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.79s/it]

{'eval_loss': 0.43814438581466675, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.069, 'eval_steps_per_second': 12.069, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.95s/it]


{'train_runtime': 70.0614, 'train_samples_per_second': 4.025, 'train_steps_per_second': 0.514, 'train_loss': 0.4854653676350911, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 139.75it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 95/95 [00:00<00:00, 18786.37 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 486.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 95/95 [00:01<00:00, 87.72 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 242.98 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.6597455739974976, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0753, 'eval_samples_per_second': 13.272, 'eval_steps_per_second': 13.272, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:21,  1.82s/it]

{'eval_loss': 0.45542505383491516, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.211, 'eval_steps_per_second': 12.211, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.77s/it]

{'eval_loss': 0.43778714537620544, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.298, 'eval_steps_per_second': 12.298, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.96s/it]


{'train_runtime': 70.5498, 'train_samples_per_second': 4.04, 'train_steps_per_second': 0.51, 'train_loss': 0.48651578691270614, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.71it/s]
100%|██████████| 1/1 [00:00<00:00, 165.17it/s]
Casting the dataset: 100%|██████████| 95/95 [00:00<00:00, 23480.19 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 487.65 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.94 examples/s]
Map: 100%|██████████| 95/95 [00:01<00:00, 91.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6597455739974976, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0749, 'eval_samples_per_second': 13.346, 'eval_steps_per_second': 13.346, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:22,  1.85s/it]

{'eval_loss': 0.45542505383491516, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.072, 'eval_steps_per_second': 12.072, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.77s/it]

{'eval_loss': 0.43778714537620544, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.0774, 'eval_samples_per_second': 12.922, 'eval_steps_per_second': 12.922, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.96s/it]


{'train_runtime': 70.6016, 'train_samples_per_second': 4.037, 'train_steps_per_second': 0.51, 'train_loss': 0.48651578691270614, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 132.15it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.52it/s]
100%|██████████| 1/1 [00:00<00:00, 165.11it/s]
Casting the dataset: 100%|██████████| 95/95 [00:00<00:00, 31626.23 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 95/95 [00:01<00:00, 92.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 316.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.6597455739974976, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0738, 'eval_samples_per_second': 13.541, 'eval_steps_per_second': 13.541, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:21,  1.76s/it]

{'eval_loss': 0.45542505383491516, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.367, 'eval_steps_per_second': 13.367, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.84s/it]

{'eval_loss': 0.43778714537620544, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.0769, 'eval_samples_per_second': 12.997, 'eval_steps_per_second': 12.997, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.94s/it]


{'train_runtime': 69.8438, 'train_samples_per_second': 4.081, 'train_steps_per_second': 0.515, 'train_loss': 0.48651578691270614, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.97it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.82it/s]
100%|██████████| 1/1 [00:00<00:00, 166.61it/s]
Casting the dataset: 100%|██████████| 95/95 [00:00<00:00, 23433.24 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 951.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1003.18 examples/s]
Map: 100%|██████████| 95/95 [00:01<00:00, 87.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 244.98 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6597455739974976, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0759, 'eval_samples_per_second': 13.183, 'eval_steps_per_second': 13.183, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:22,  1.83s/it]

{'eval_loss': 0.45542505383491516, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.226, 'eval_steps_per_second': 12.226, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.76s/it]

{'eval_loss': 0.43778714537620544, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.85, 'eval_steps_per_second': 12.85, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.96s/it]


{'train_runtime': 70.4793, 'train_samples_per_second': 4.044, 'train_steps_per_second': 0.511, 'train_loss': 0.48651578691270614, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.44it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.53it/s]
100%|██████████| 1/1 [00:00<00:00, 141.34it/s]
Casting the dataset: 100%|██████████| 95/95 [00:00<00:00, 18988.70 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 95/95 [00:01<00:00, 91.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.83 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.6597455739974976, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.292, 'eval_steps_per_second': 12.292, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:21,  1.76s/it]

{'eval_loss': 0.45542505383491516, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.218, 'eval_steps_per_second': 12.218, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.83s/it]

{'eval_loss': 0.43778714537620544, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.859, 'eval_steps_per_second': 12.859, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.94s/it]


{'train_runtime': 69.7899, 'train_samples_per_second': 4.084, 'train_steps_per_second': 0.516, 'train_loss': 0.48651578691270614, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 164.75it/s]
Casting the dataset: 100%|██████████| 96/96 [00:00<00:00, 23734.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 939.16 examples/s]
Map: 100%|██████████| 96/96 [00:01<00:00, 92.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.83 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6592231392860413, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.639, 'eval_steps_per_second': 11.639, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:22,  1.86s/it]

{'eval_loss': 0.4551176428794861, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.36, 'eval_steps_per_second': 12.36, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.79s/it]

{'eval_loss': 0.43754643201828003, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.784, 'eval_steps_per_second': 11.784, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.95s/it]


{'train_runtime': 70.3449, 'train_samples_per_second': 4.094, 'train_steps_per_second': 0.512, 'train_loss': 0.48619821336534286, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.79it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.48it/s]
100%|██████████| 1/1 [00:00<00:00, 164.90it/s]
Casting the dataset: 100%|██████████| 96/96 [00:00<00:00, 23659.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 96/96 [00:01<00:00, 91.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.08 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6592231392860413, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.782, 'eval_steps_per_second': 11.782, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:21,  1.80s/it]

{'eval_loss': 0.4551176428794861, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.368, 'eval_steps_per_second': 12.368, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.91s/it]

{'eval_loss': 0.43754643201828003, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.913, 'eval_steps_per_second': 11.913, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  1.97s/it]


{'train_runtime': 71.0576, 'train_samples_per_second': 4.053, 'train_steps_per_second': 0.507, 'train_loss': 0.48619821336534286, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.75it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 110.28it/s]
100%|██████████| 1/1 [00:00<00:00, 166.63it/s]
Casting the dataset: 100%|██████████| 96/96 [00:00<00:00, 31997.23 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.45 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 96/96 [00:01<00:00, 93.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6592231392860413, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.91, 'eval_steps_per_second': 11.91, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:21,  1.81s/it]

{'eval_loss': 0.4551176428794861, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.786, 'eval_steps_per_second': 11.786, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.85s/it]

{'eval_loss': 0.43754643201828003, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.131, 'eval_samples_per_second': 7.634, 'eval_steps_per_second': 7.634, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.96s/it]


{'train_runtime': 70.6971, 'train_samples_per_second': 4.074, 'train_steps_per_second': 0.509, 'train_loss': 0.48619821336534286, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.66it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.67it/s]
100%|██████████| 1/1 [00:00<00:00, 165.09it/s]
Casting the dataset: 100%|██████████| 96/96 [00:00<00:00, 24007.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 96/96 [00:01<00:00, 93.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6592231392860413, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.37, 'eval_steps_per_second': 12.37, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:22,  1.86s/it]

{'eval_loss': 0.4551176428794861, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.647, 'eval_steps_per_second': 11.647, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.81s/it]

{'eval_loss': 0.43754643201828003, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.907, 'eval_steps_per_second': 11.907, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.96s/it]


{'train_runtime': 70.701, 'train_samples_per_second': 4.073, 'train_steps_per_second': 0.509, 'train_loss': 0.48619821336534286, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.16it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 131.94it/s]
100%|██████████| 1/1 [00:00<00:00, 141.12it/s]
Casting the dataset: 100%|██████████| 96/96 [00:00<00:00, 19206.89 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 952.39 examples/s]
Map: 100%|██████████| 96/96 [00:01<00:00, 88.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 243.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 196.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6592231392860413, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.366, 'eval_steps_per_second': 12.366, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:21,  1.81s/it]

{'eval_loss': 0.4551176428794861, 'eval_precision': 0.16666666666666666, 'eval_recall': 0.14285714285714285, 'eval_f1': 0.15384615384615383, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.22222222222222224, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.215, 'eval_steps_per_second': 12.215, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.89s/it]

{'eval_loss': 0.43754643201828003, 'eval_precision': 0.2222222222222222, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.25, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.068, 'eval_steps_per_second': 12.068, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  1.98s/it]


{'train_runtime': 71.1319, 'train_samples_per_second': 4.049, 'train_steps_per_second': 0.506, 'train_loss': 0.48619821336534286, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.28it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.39it/s]
100%|██████████| 1/1 [00:00<00:00, 141.42it/s]
Casting the dataset: 100%|██████████| 97/97 [00:00<00:00, 24245.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Map: 100%|██████████| 97/97 [00:01<00:00, 91.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.5925356149673462, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.366, 'eval_steps_per_second': 12.366, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 26/39 [00:46<00:19,  1.47s/it]

{'eval_loss': 0.43669843673706055, 'eval_precision': 0.25, 'eval_recall': 0.2857142857142857, 'eval_f1': 0.26666666666666666, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.36363636363636365, 'eval_runtime': 0.0885, 'eval_samples_per_second': 11.297, 'eval_steps_per_second': 11.297, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 39/39 [01:11<00:00,  1.52s/it]

{'eval_loss': 0.42211973667144775, 'eval_precision': 0.4444444444444444, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.5, 'eval_accuracy': 0.9523809523809523, 'eval_span_f1': 0.6153846153846153, 'eval_runtime': 0.0888, 'eval_samples_per_second': 11.255, 'eval_steps_per_second': 11.255, 'epoch': 3.0}


100%|██████████| 39/39 [01:12<00:00,  1.86s/it]


{'train_runtime': 72.6475, 'train_samples_per_second': 4.006, 'train_steps_per_second': 0.537, 'train_loss': 0.4547953972449669, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 76.55it/s]
100%|██████████| 1/1 [00:00<00:00, 66.13it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
Casting the dataset: 100%|██████████| 97/97 [00:00<00:00, 19200.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 97/97 [00:01<00:00, 90.72 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 277.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to b

{'eval_loss': 0.826301634311676, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.383, 'eval_steps_per_second': 11.383, 'epoch': 1.0}


 67%|██████▋   | 26/39 [00:48<00:19,  1.53s/it]
 67%|██████▋   | 26/39 [00:48<00:19,  1.53s/it]

{'eval_loss': 0.7221124768257141, 'eval_precision': 0.6, 'eval_recall': 0.42857142857142855, 'eval_f1': 0.5, 'eval_accuracy': 0.9365079365079365, 'eval_span_f1': 0.4, 'eval_runtime': 0.1201, 'eval_samples_per_second': 8.329, 'eval_steps_per_second': 8.329, 'epoch': 2.0}


100%|██████████| 39/39 [01:11<00:00,  1.48s/it]
100%|██████████| 39/39 [01:11<00:00,  1.48s/it]

{'eval_loss': 0.6790327429771423, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.5454545454545454, 'eval_runtime': 0.0864, 'eval_samples_per_second': 11.577, 'eval_steps_per_second': 11.577, 'epoch': 3.0}


100%|██████████| 39/39 [01:13<00:00,  1.88s/it]


{'train_runtime': 73.3442, 'train_samples_per_second': 3.968, 'train_steps_per_second': 0.532, 'train_loss': 0.4419532678066156, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 68.23it/s]
100%|██████████| 1/1 [00:00<00:00, 65.89it/s]
100%|██████████| 1/1 [00:00<00:00, 176.38it/s]
Casting the dataset: 100%|██████████| 97/97 [00:00<00:00, 23888.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 97/97 [00:01<00:00, 89.47 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 334.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 197.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  _war

{'eval_loss': 0.826301634311676, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0804, 'eval_samples_per_second': 12.432, 'eval_steps_per_second': 12.432, 'epoch': 1.0}


 67%|██████▋   | 26/39 [00:47<00:19,  1.53s/it]
 67%|██████▋   | 26/39 [00:47<00:19,  1.53s/it]

{'eval_loss': 0.7221124768257141, 'eval_precision': 0.6, 'eval_recall': 0.42857142857142855, 'eval_f1': 0.5, 'eval_accuracy': 0.9365079365079365, 'eval_span_f1': 0.4, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.929, 'eval_steps_per_second': 11.929, 'epoch': 2.0}


100%|██████████| 39/39 [01:11<00:00,  1.50s/it]
100%|██████████| 39/39 [01:11<00:00,  1.50s/it]

{'eval_loss': 0.6790327429771423, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.5454545454545454, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.366, 'eval_steps_per_second': 12.366, 'epoch': 3.0}


100%|██████████| 39/39 [01:13<00:00,  1.87s/it]


{'train_runtime': 73.0691, 'train_samples_per_second': 3.983, 'train_steps_per_second': 0.534, 'train_loss': 0.4419532678066156, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 164.92it/s]
100%|██████████| 1/1 [00:00<00:00, 141.29it/s]
100%|██████████| 1/1 [00:00<00:00, 164.51it/s]
Casting the dataset: 100%|██████████| 97/97 [00:00<00:00, 23911.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Map: 100%|██████████| 97/97 [00:01<00:00, 90.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 197.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 197.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  _wa

{'eval_loss': 0.826301634311676, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.058, 'eval_steps_per_second': 12.058, 'epoch': 1.0}


 67%|██████▋   | 26/39 [00:46<00:19,  1.48s/it]
 67%|██████▋   | 26/39 [00:46<00:19,  1.48s/it]

{'eval_loss': 0.7221124768257141, 'eval_precision': 0.6, 'eval_recall': 0.42857142857142855, 'eval_f1': 0.5, 'eval_accuracy': 0.9365079365079365, 'eval_span_f1': 0.4, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.224, 'eval_steps_per_second': 12.224, 'epoch': 2.0}


100%|██████████| 39/39 [01:11<00:00,  1.52s/it]
100%|██████████| 39/39 [01:11<00:00,  1.52s/it]

{'eval_loss': 0.6790327429771423, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.5454545454545454, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.223, 'eval_steps_per_second': 12.223, 'epoch': 3.0}


100%|██████████| 39/39 [01:12<00:00,  1.86s/it]


{'train_runtime': 72.6439, 'train_samples_per_second': 4.006, 'train_steps_per_second': 0.537, 'train_loss': 0.4419532678066156, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.54it/s]
100%|██████████| 1/1 [00:00<00:00, 166.63it/s]
100%|██████████| 1/1 [00:00<00:00, 165.05it/s]
Casting the dataset: 100%|██████████| 97/97 [00:00<00:00, 32340.82 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 97/97 [00:01<00:00, 90.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 196.56 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  _warn_prf(a

{'eval_loss': 0.826301634311676, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9126984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.534, 'eval_steps_per_second': 12.534, 'epoch': 1.0}


 67%|██████▋   | 26/39 [00:47<00:19,  1.48s/it]
 67%|██████▋   | 26/39 [00:47<00:19,  1.48s/it]

{'eval_loss': 0.7221124768257141, 'eval_precision': 0.6, 'eval_recall': 0.42857142857142855, 'eval_f1': 0.5, 'eval_accuracy': 0.9365079365079365, 'eval_span_f1': 0.4, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.641, 'eval_steps_per_second': 11.641, 'epoch': 2.0}


100%|██████████| 39/39 [01:11<00:00,  1.52s/it]
100%|██████████| 39/39 [01:12<00:00,  1.52s/it]

{'eval_loss': 0.6790327429771423, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.5714285714285714, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.5454545454545454, 'eval_runtime': 0.089, 'eval_samples_per_second': 11.241, 'eval_steps_per_second': 11.241, 'epoch': 3.0}


100%|██████████| 39/39 [01:13<00:00,  1.88s/it]


{'train_runtime': 73.3512, 'train_samples_per_second': 3.967, 'train_steps_per_second': 0.532, 'train_loss': 0.4419532678066156, 'epoch': 3.0}


100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
100%|██████████| 1/1 [00:00<00:00, 152.35it/s]
100%|██████████| 1/1 [00:00<00:00, 165.22it/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.83 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  _warn

{'eval_loss': 2.073737144470215, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.4444444444444444, 'eval_span_f1': 0.0, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.253, 'eval_steps_per_second': 11.253, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:02<00:01,  1.53s/it]

{'eval_loss': 1.7278363704681396, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8650793650793651, 'eval_span_f1': 0.0, 'eval_runtime': 0.0974, 'eval_samples_per_second': 10.262, 'eval_steps_per_second': 10.262, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  1.81s/it]

{'eval_loss': 1.563873291015625, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0888, 'eval_samples_per_second': 11.258, 'eval_steps_per_second': 11.258, 'epoch': 3.0}


100%|██████████| 3/3 [00:06<00:00,  2.12s/it]


{'train_runtime': 6.3512, 'train_samples_per_second': 0.472, 'train_steps_per_second': 0.472, 'train_loss': 2.069930076599121, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.80it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.53it/s]
100%|██████████| 1/1 [00:00<00:00, 164.46it/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 234.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initiali

{'eval_loss': 1.984977126121521, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.626984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.512, 'eval_steps_per_second': 11.512, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.62s/it]

{'eval_loss': 1.5946296453475952, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.784, 'eval_steps_per_second': 11.784, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  1.88s/it]

{'eval_loss': 1.3972376585006714, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0931, 'eval_samples_per_second': 10.737, 'eval_steps_per_second': 10.737, 'epoch': 3.0}


100%|██████████| 3/3 [00:06<00:00,  2.19s/it]


{'train_runtime': 6.5827, 'train_samples_per_second': 0.456, 'train_steps_per_second': 0.456, 'train_loss': 1.742810885111491, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.62it/s]
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
100%|██████████| 1/1 [00:00<00:00, 141.07it/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.55 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should 

{'eval_loss': 1.984977126121521, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.626984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.925, 'eval_steps_per_second': 11.925, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:02<00:01,  1.58s/it]

{'eval_loss': 1.5946296453475952, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.092, 'eval_samples_per_second': 10.874, 'eval_steps_per_second': 10.874, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  1.83s/it]

{'eval_loss': 1.3972376585006714, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0919, 'eval_samples_per_second': 10.883, 'eval_steps_per_second': 10.883, 'epoch': 3.0}


100%|██████████| 3/3 [00:06<00:00,  2.16s/it]


{'train_runtime': 6.4816, 'train_samples_per_second': 0.463, 'train_steps_per_second': 0.463, 'train_loss': 1.742810885111491, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 124.21it/s]
100%|██████████| 1/1 [00:00<00:00, 164.81it/s]
100%|██████████| 1/1 [00:00<00:00, 141.71it/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 961.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 1.984977126121521, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.626984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.797, 'eval_steps_per_second': 11.797, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.59s/it]

{'eval_loss': 1.5946296453475952, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.349, 'eval_steps_per_second': 11.349, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  1.85s/it]

{'eval_loss': 1.3972376585006714, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0767, 'eval_samples_per_second': 13.037, 'eval_steps_per_second': 13.037, 'epoch': 3.0}


100%|██████████| 3/3 [00:06<00:00,  2.17s/it]


{'train_runtime': 6.5081, 'train_samples_per_second': 0.461, 'train_steps_per_second': 0.461, 'train_loss': 1.742810885111491, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.42it/s]
100%|██████████| 1/1 [00:00<00:00, 141.68it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 1.984977126121521, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.626984126984127, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.361, 'eval_steps_per_second': 13.361, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:02<00:01,  1.57s/it]

{'eval_loss': 1.5946296453475952, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0969, 'eval_samples_per_second': 10.315, 'eval_steps_per_second': 10.315, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  1.85s/it]

{'eval_loss': 1.3972376585006714, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0899, 'eval_samples_per_second': 11.129, 'eval_steps_per_second': 11.129, 'epoch': 3.0}


100%|██████████| 3/3 [00:06<00:00,  2.17s/it]


{'train_runtime': 6.502, 'train_samples_per_second': 0.461, 'train_steps_per_second': 0.461, 'train_loss': 1.742810885111491, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.52it/s]
100%|██████████| 1/1 [00:00<00:00, 142.82it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
Casting the dataset: 100%|██████████| 2/2 [00:00<00:00, 980.66 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 2/2 [00:00<00:00, 330.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shou

{'eval_loss': 1.9840054512023926, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6031746031746031, 'eval_span_f1': 0.0, 'eval_runtime': 0.0884, 'eval_samples_per_second': 11.308, 'eval_steps_per_second': 11.308, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.73s/it]

{'eval_loss': 1.593544840812683, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.92, 'eval_steps_per_second': 11.92, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  2.02s/it]

{'eval_loss': 1.403480887413025, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.842, 'eval_steps_per_second': 12.842, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.35s/it]


{'train_runtime': 7.0358, 'train_samples_per_second': 0.853, 'train_steps_per_second': 0.426, 'train_loss': 1.7178888320922852, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.67it/s]
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
Casting the dataset: 100%|██████████| 2/2 [00:00<00:00, 1998.24 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 2/2 [00:00<00:00, 124.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 248.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 1.9840054512023926, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6031746031746031, 'eval_span_f1': 0.0, 'eval_runtime': 0.0774, 'eval_samples_per_second': 12.924, 'eval_steps_per_second': 12.924, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:16<00:09,  9.64s/it]

{'eval_loss': 1.593544840812683, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.091, 'eval_samples_per_second': 10.994, 'eval_steps_per_second': 10.994, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:19<00:00,  6.33s/it]

{'eval_loss': 1.403480887413025, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.845, 'eval_steps_per_second': 12.845, 'epoch': 3.0}


100%|██████████| 3/3 [00:20<00:00,  6.82s/it]


{'train_runtime': 20.4738, 'train_samples_per_second': 0.293, 'train_steps_per_second': 0.147, 'train_loss': 1.7178888320922852, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.55it/s]
100%|██████████| 1/1 [00:00<00:00, 141.40it/s]
100%|██████████| 1/1 [00:00<00:00, 164.62it/s]
Casting the dataset: 100%|██████████| 2/2 [00:00<00:00, 1938.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 484.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 2/2 [00:00<00:00, 330.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.51 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 1.9840054512023926, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6031746031746031, 'eval_span_f1': 0.0, 'eval_runtime': 0.0757, 'eval_samples_per_second': 13.204, 'eval_steps_per_second': 13.204, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.75s/it]

{'eval_loss': 1.593544840812683, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.534, 'eval_steps_per_second': 12.534, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  2.05s/it]

{'eval_loss': 1.403480887413025, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.012, 'eval_steps_per_second': 13.012, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.35s/it]


{'train_runtime': 7.0583, 'train_samples_per_second': 0.85, 'train_steps_per_second': 0.425, 'train_loss': 1.7178888320922852, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.35it/s]
100%|██████████| 1/1 [00:00<00:00, 141.65it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 2/2 [00:00<00:00, 1996.81 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Map: 100%|██████████| 2/2 [00:00<00:00, 331.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 1.9840054512023926, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6031746031746031, 'eval_span_f1': 0.0, 'eval_runtime': 0.0843, 'eval_samples_per_second': 11.867, 'eval_steps_per_second': 11.867, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.79s/it]

{'eval_loss': 1.593544840812683, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0842, 'eval_samples_per_second': 11.883, 'eval_steps_per_second': 11.883, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:05<00:00,  2.05s/it]

{'eval_loss': 1.403480887413025, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.101, 'eval_samples_per_second': 9.905, 'eval_steps_per_second': 9.905, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.38s/it]


{'train_runtime': 7.1405, 'train_samples_per_second': 0.84, 'train_steps_per_second': 0.42, 'train_loss': 1.7178888320922852, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 137.64it/s]
100%|██████████| 1/1 [00:00<00:00, 142.88it/s]
100%|██████████| 1/1 [00:00<00:00, 268.11it/s]
Casting the dataset: 100%|██████████| 2/2 [00:00<00:00, 2000.14 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 985.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 2/2 [00:00<00:00, 637.53 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 128.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 197.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 223.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probabl

{'eval_loss': 1.9840054512023926, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6031746031746031, 'eval_span_f1': 0.0, 'eval_runtime': 0.0957, 'eval_samples_per_second': 10.452, 'eval_steps_per_second': 10.452, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.35s/it]

{'eval_loss': 1.593544840812683, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1038, 'eval_samples_per_second': 9.635, 'eval_steps_per_second': 9.635, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:18<00:00,  7.80s/it]

{'eval_loss': 1.403480887413025, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0793, 'eval_samples_per_second': 12.611, 'eval_steps_per_second': 12.611, 'epoch': 3.0}


100%|██████████| 3/3 [00:19<00:00,  6.66s/it]


{'train_runtime': 19.969, 'train_samples_per_second': 0.3, 'train_steps_per_second': 0.15, 'train_loss': 1.7178888320922852, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.42it/s]
100%|██████████| 1/1 [00:00<00:00, 145.44it/s]
100%|██████████| 1/1 [00:00<00:00, 148.69it/s]
Casting the dataset: 100%|██████████| 3/3 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Map: 100%|██████████| 3/3 [00:00<00:00, 780.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 126.53 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.48 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN th

{'eval_loss': 1.9786299467086792, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6190476190476191, 'eval_span_f1': 0.0, 'eval_runtime': 0.1073, 'eval_samples_per_second': 9.316, 'eval_steps_per_second': 9.316, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:02,  2.01s/it]

{'eval_loss': 1.5901395082473755, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0924, 'eval_samples_per_second': 10.826, 'eval_steps_per_second': 10.826, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.31s/it]

{'eval_loss': 1.401063084602356, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0903, 'eval_samples_per_second': 11.068, 'eval_steps_per_second': 11.068, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.70s/it]


{'train_runtime': 8.1086, 'train_samples_per_second': 1.11, 'train_steps_per_second': 0.37, 'train_loss': 1.7015784581502278, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 167.93it/s]
100%|██████████| 1/1 [00:00<00:00, 577.57it/s]
100%|██████████| 1/1 [00:00<00:00, 150.59it/s]
Casting the dataset: 100%|██████████| 3/3 [00:00<00:00, 2597.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 219.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 3/3 [00:00<00:00, 379.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 267.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 142.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 145.26 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN th

{'eval_loss': 1.9786299467086792, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6190476190476191, 'eval_span_f1': 0.0, 'eval_runtime': 0.094, 'eval_samples_per_second': 10.635, 'eval_steps_per_second': 10.635, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.17s/it]

{'eval_loss': 1.5901395082473755, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0855, 'eval_samples_per_second': 11.701, 'eval_steps_per_second': 11.701, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.41s/it]

{'eval_loss': 1.401063084602356, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.66, 'eval_steps_per_second': 11.66, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.80s/it]


{'train_runtime': 8.4069, 'train_samples_per_second': 1.071, 'train_steps_per_second': 0.357, 'train_loss': 1.7015784581502278, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 156.67it/s]
100%|██████████| 1/1 [00:00<00:00, 144.44it/s]
100%|██████████| 1/1 [00:00<00:00, 169.30it/s]
Casting the dataset: 100%|██████████| 3/3 [00:00<00:00, 4204.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 559.61 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 3/3 [00:00<00:00, 459.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 321.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model 

{'eval_loss': 1.9786299467086792, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6190476190476191, 'eval_span_f1': 0.0, 'eval_runtime': 0.1129, 'eval_samples_per_second': 8.857, 'eval_steps_per_second': 8.857, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.57s/it]

{'eval_loss': 1.5901395082473755, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1221, 'eval_samples_per_second': 8.189, 'eval_steps_per_second': 8.189, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:18<00:00,  7.80s/it]

{'eval_loss': 1.401063084602356, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0901, 'eval_samples_per_second': 11.097, 'eval_steps_per_second': 11.097, 'epoch': 3.0}


100%|██████████| 3/3 [00:20<00:00,  6.73s/it]


{'train_runtime': 20.1935, 'train_samples_per_second': 0.446, 'train_steps_per_second': 0.149, 'train_loss': 1.7015784581502278, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 149.35it/s]
100%|██████████| 1/1 [00:00<00:00, 101.26it/s]
100%|██████████| 1/1 [00:00<00:00, 142.41it/s]
Casting the dataset: 100%|██████████| 3/3 [00:00<00:00, 2582.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 146.70 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 3/3 [00:00<00:00, 430.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 126.72 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 115.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN th

{'eval_loss': 1.9786299467086792, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6190476190476191, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.002, 'eval_steps_per_second': 12.002, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:03<00:01,  1.99s/it]

{'eval_loss': 1.5901395082473755, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.094, 'eval_samples_per_second': 10.639, 'eval_steps_per_second': 10.639, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.24s/it]

{'eval_loss': 1.401063084602356, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0855, 'eval_samples_per_second': 11.702, 'eval_steps_per_second': 11.702, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.56s/it]


{'train_runtime': 7.6694, 'train_samples_per_second': 1.173, 'train_steps_per_second': 0.391, 'train_loss': 1.7015784581502278, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 138.16it/s]
100%|██████████| 1/1 [00:00<00:00, 143.44it/s]
100%|██████████| 1/1 [00:00<00:00, 237.48it/s]
Casting the dataset: 100%|██████████| 3/3 [00:00<00:00, 3002.37 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 3/3 [00:00<00:00, 431.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 101.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream t

{'eval_loss': 1.9786299467086792, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.6190476190476191, 'eval_span_f1': 0.0, 'eval_runtime': 0.108, 'eval_samples_per_second': 9.259, 'eval_steps_per_second': 9.259, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.12s/it]

{'eval_loss': 1.5901395082473755, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1269, 'eval_samples_per_second': 7.881, 'eval_steps_per_second': 7.881, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.32s/it]

{'eval_loss': 1.401063084602356, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0959, 'eval_samples_per_second': 10.426, 'eval_steps_per_second': 10.426, 'epoch': 3.0}


100%|██████████| 3/3 [00:07<00:00,  2.66s/it]


{'train_runtime': 7.9852, 'train_samples_per_second': 1.127, 'train_steps_per_second': 0.376, 'train_loss': 1.7015784581502278, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 144.54it/s]
100%|██████████| 1/1 [00:00<00:00, 149.08it/s]
100%|██████████| 1/1 [00:00<00:00, 165.40it/s]
Casting the dataset: 100%|██████████| 4/4 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 510.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 248.92 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 148.67 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream ta

{'eval_loss': 1.9224956035614014, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7222222222222222, 'eval_span_f1': 0.0, 'eval_runtime': 0.0902, 'eval_samples_per_second': 11.08, 'eval_steps_per_second': 11.08, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.15s/it]

{'eval_loss': 1.7296777963638306, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0943, 'eval_samples_per_second': 10.603, 'eval_steps_per_second': 10.603, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.40s/it]

{'eval_loss': 1.3583303689956665, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0909, 'eval_samples_per_second': 11.004, 'eval_steps_per_second': 11.004, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.72s/it]


{'train_runtime': 8.156, 'train_samples_per_second': 1.471, 'train_steps_per_second': 0.368, 'train_loss': 1.7461249033610027, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 148.61it/s]
100%|██████████| 1/1 [00:00<00:00, 143.61it/s]
100%|██████████| 1/1 [00:00<00:00, 148.25it/s]
Casting the dataset: 100%|██████████| 4/4 [00:00<00:00, 1821.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 517.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 428.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 331.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 3339.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 1.9224956035614014, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7222222222222222, 'eval_span_f1': 0.0, 'eval_runtime': 0.097, 'eval_samples_per_second': 10.307, 'eval_steps_per_second': 10.307, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.12s/it]

{'eval_loss': 1.7296777963638306, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.348, 'eval_steps_per_second': 11.348, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.45s/it]

{'eval_loss': 1.3583303689956665, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0901, 'eval_samples_per_second': 11.096, 'eval_steps_per_second': 11.096, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.76s/it]


{'train_runtime': 8.2872, 'train_samples_per_second': 1.448, 'train_steps_per_second': 0.362, 'train_loss': 1.7461249033610027, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 147.24it/s]
100%|██████████| 1/1 [00:00<00:00, 147.87it/s]
100%|██████████| 1/1 [00:00<00:00, 216.16it/s]
Casting the dataset: 100%|██████████| 4/4 [00:00<00:00, 3908.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 574.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 115.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 583.92 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model

{'eval_loss': 1.9224956035614014, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7222222222222222, 'eval_span_f1': 0.0, 'eval_runtime': 0.0883, 'eval_samples_per_second': 11.33, 'eval_steps_per_second': 11.33, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.11s/it]

{'eval_loss': 1.7296777963638306, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.1297, 'eval_samples_per_second': 7.708, 'eval_steps_per_second': 7.708, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.35s/it]

{'eval_loss': 1.3583303689956665, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0872, 'eval_samples_per_second': 11.462, 'eval_steps_per_second': 11.462, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.67s/it]


{'train_runtime': 8.0114, 'train_samples_per_second': 1.498, 'train_steps_per_second': 0.374, 'train_loss': 1.7461249033610027, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 114.81it/s]
100%|██████████| 1/1 [00:00<00:00, 143.38it/s]
100%|██████████| 1/1 [00:00<00:00, 100.94it/s]
Casting the dataset: 100%|██████████| 4/4 [00:00<00:00, 2064.89 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1218.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 408.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 253.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 124.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 341.58 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probab

{'eval_loss': 1.9224956035614014, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7222222222222222, 'eval_span_f1': 0.0, 'eval_runtime': 0.0736, 'eval_samples_per_second': 13.58, 'eval_steps_per_second': 13.58, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.14s/it]

{'eval_loss': 1.7296777963638306, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0899, 'eval_samples_per_second': 11.124, 'eval_steps_per_second': 11.124, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.39s/it]

{'eval_loss': 1.3583303689956665, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.053, 'eval_steps_per_second': 12.053, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.71s/it]


{'train_runtime': 8.1307, 'train_samples_per_second': 1.476, 'train_steps_per_second': 0.369, 'train_loss': 1.7461249033610027, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.20it/s]
100%|██████████| 1/1 [00:00<00:00, 165.51it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 4/4 [00:00<00:00, 4002.20 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 488.85 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 442.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 1.9224956035614014, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7222222222222222, 'eval_span_f1': 0.0, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.245, 'eval_steps_per_second': 11.245, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.12s/it]

{'eval_loss': 1.7296777963638306, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.1009, 'eval_samples_per_second': 9.911, 'eval_steps_per_second': 9.911, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:06<00:00,  2.42s/it]

{'eval_loss': 1.3583303689956665, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0909, 'eval_samples_per_second': 11.001, 'eval_steps_per_second': 11.001, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.76s/it]


{'train_runtime': 8.2818, 'train_samples_per_second': 1.449, 'train_steps_per_second': 0.362, 'train_loss': 1.7461249033610027, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 110.29it/s]
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
100%|██████████| 1/1 [00:00<00:00, 163.76it/s]
Casting the dataset: 100%|██████████| 5/5 [00:00<00:00, 4992.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 488.79 examples/s]
Map: 100%|██████████| 5/5 [00:00<00:00, 452.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.1515932083129883, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.373015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.381, 'eval_steps_per_second': 11.381, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.23s/it]

{'eval_loss': 1.8783804178237915, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.746031746031746, 'eval_span_f1': 0.0, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.247, 'eval_steps_per_second': 11.247, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.55s/it]

{'eval_loss': 1.7278752326965332, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.093, 'eval_samples_per_second': 10.754, 'eval_steps_per_second': 10.754, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.85s/it]


{'train_runtime': 8.5483, 'train_samples_per_second': 1.755, 'train_steps_per_second': 0.351, 'train_loss': 1.935574213663737, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.90it/s]
100%|██████████| 1/1 [00:00<00:00, 76.53it/s]
100%|██████████| 1/1 [00:00<00:00, 89.95it/s]
Casting the dataset: 100%|██████████| 5/5 [00:00<00:00, 4992.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 963.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 5/5 [00:00<00:00, 498.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should 

{'eval_loss': 2.1515932083129883, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.373015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.195, 'eval_steps_per_second': 12.195, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.25s/it]

{'eval_loss': 1.8783804178237915, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.746031746031746, 'eval_span_f1': 0.0, 'eval_runtime': 0.0909, 'eval_samples_per_second': 10.998, 'eval_steps_per_second': 10.998, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.54s/it]

{'eval_loss': 1.7278752326965332, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.526, 'eval_steps_per_second': 11.526, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.87s/it]


{'train_runtime': 8.5983, 'train_samples_per_second': 1.745, 'train_steps_per_second': 0.349, 'train_loss': 1.935574213663737, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.40it/s]
100%|██████████| 1/1 [00:00<00:00, 142.88it/s]
100%|██████████| 1/1 [00:00<00:00, 141.67it/s]
Casting the dataset: 100%|██████████| 5/5 [00:00<00:00, 4995.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 5/5 [00:00<00:00, 498.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 488.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.1515932083129883, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.373015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.009, 'eval_steps_per_second': 13.009, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.21s/it]

{'eval_loss': 1.8783804178237915, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.746031746031746, 'eval_span_f1': 0.0, 'eval_runtime': 0.0827, 'eval_samples_per_second': 12.088, 'eval_steps_per_second': 12.088, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.51s/it]

{'eval_loss': 1.7278752326965332, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0926, 'eval_samples_per_second': 10.798, 'eval_steps_per_second': 10.798, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.83s/it]


{'train_runtime': 8.4836, 'train_samples_per_second': 1.768, 'train_steps_per_second': 0.354, 'train_loss': 1.935574213663737, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.34it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 5/5 [00:00<00:00, 5001.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 5/5 [00:00<00:00, 497.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.83 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.1515932083129883, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.373015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0789, 'eval_samples_per_second': 12.667, 'eval_steps_per_second': 12.667, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.24s/it]

{'eval_loss': 1.8783804178237915, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.746031746031746, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.53, 'eval_steps_per_second': 12.53, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.53s/it]

{'eval_loss': 1.7278752326965332, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0896, 'eval_samples_per_second': 11.158, 'eval_steps_per_second': 11.158, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.85s/it]


{'train_runtime': 8.5416, 'train_samples_per_second': 1.756, 'train_steps_per_second': 0.351, 'train_loss': 1.935574213663737, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.13it/s]
100%|██████████| 1/1 [00:00<00:00, 123.99it/s]
100%|██████████| 1/1 [00:00<00:00, 164.80it/s]
Casting the dataset: 100%|██████████| 5/5 [00:00<00:00, 5005.14 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 956.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 5/5 [00:00<00:00, 452.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 2.1515932083129883, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.373015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.014, 'eval_steps_per_second': 13.014, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.22s/it]

{'eval_loss': 1.8783804178237915, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.746031746031746, 'eval_span_f1': 0.0, 'eval_runtime': 0.0837, 'eval_samples_per_second': 11.943, 'eval_steps_per_second': 11.943, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.54s/it]

{'eval_loss': 1.7278752326965332, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.068, 'eval_steps_per_second': 12.068, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.85s/it]


{'train_runtime': 8.559, 'train_samples_per_second': 1.753, 'train_steps_per_second': 0.351, 'train_loss': 1.935574213663737, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
100%|██████████| 1/1 [00:00<00:00, 141.28it/s]
100%|██████████| 1/1 [00:00<00:00, 164.99it/s]
Casting the dataset: 100%|██████████| 6/6 [00:00<00:00, 5997.57 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 488.22 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 498.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shou

{'eval_loss': 2.142003297805786, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.40476190476190477, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.513, 'eval_steps_per_second': 11.513, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.41s/it]

{'eval_loss': 1.8609869480133057, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0969, 'eval_samples_per_second': 10.321, 'eval_steps_per_second': 10.321, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.67s/it]

{'eval_loss': 1.7172540426254272, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0899, 'eval_samples_per_second': 11.121, 'eval_steps_per_second': 11.121, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  3.00s/it]


{'train_runtime': 8.9945, 'train_samples_per_second': 2.001, 'train_steps_per_second': 0.334, 'train_loss': 1.9353424708048503, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
100%|██████████| 1/1 [00:00<00:00, 166.74it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.28it/s]
Casting the dataset: 100%|██████████| 6/6 [00:00<00:00, 5993.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 543.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.93 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initiali

{'eval_loss': 2.142003297805786, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.40476190476190477, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.776, 'eval_steps_per_second': 11.776, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.38s/it]

{'eval_loss': 1.8609869480133057, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0929, 'eval_samples_per_second': 10.763, 'eval_steps_per_second': 10.763, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.67s/it]

{'eval_loss': 1.7172540426254272, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.502, 'eval_steps_per_second': 11.502, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  3.00s/it]


{'train_runtime': 8.9904, 'train_samples_per_second': 2.002, 'train_steps_per_second': 0.334, 'train_loss': 1.9353424708048503, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.48it/s]
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
Casting the dataset: 100%|██████████| 6/6 [00:00<00:00, 6009.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.82 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 496.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 494.55 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initializ

{'eval_loss': 2.142003297805786, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.40476190476190477, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.016, 'eval_steps_per_second': 13.016, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.40s/it]

{'eval_loss': 1.8609869480133057, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.521, 'eval_steps_per_second': 11.521, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.66s/it]

{'eval_loss': 1.7172540426254272, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.933, 'eval_steps_per_second': 11.933, 'epoch': 3.0}


100%|██████████| 3/3 [00:08<00:00,  2.99s/it]


{'train_runtime': 8.9553, 'train_samples_per_second': 2.01, 'train_steps_per_second': 0.335, 'train_loss': 1.9353424708048503, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.88it/s]
100%|██████████| 1/1 [00:00<00:00, 165.01it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.53it/s]
Casting the dataset: 100%|██████████| 6/6 [00:00<00:00, 5991.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 597.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initial

{'eval_loss': 2.142003297805786, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.40476190476190477, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.846, 'eval_steps_per_second': 12.846, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.41s/it]

{'eval_loss': 1.8609869480133057, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.937, 'eval_steps_per_second': 11.937, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.70s/it]

{'eval_loss': 1.7172540426254272, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0888, 'eval_samples_per_second': 11.262, 'eval_steps_per_second': 11.262, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.03s/it]


{'train_runtime': 9.0878, 'train_samples_per_second': 1.981, 'train_steps_per_second': 0.33, 'train_loss': 1.9353424708048503, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.87it/s]
100%|██████████| 1/1 [00:00<00:00, 123.90it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.17it/s]
Casting the dataset: 100%|██████████| 6/6 [00:00<00:00, 5997.57 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 497.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initiali

{'eval_loss': 2.142003297805786, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.40476190476190477, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.019, 'eval_steps_per_second': 13.019, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.41s/it]

{'eval_loss': 1.8609869480133057, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.753968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.087, 'eval_samples_per_second': 11.489, 'eval_steps_per_second': 11.489, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:07<00:00,  2.69s/it]

{'eval_loss': 1.7172540426254272, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8492063492063492, 'eval_span_f1': 0.0, 'eval_runtime': 0.0964, 'eval_samples_per_second': 10.376, 'eval_steps_per_second': 10.376, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.03s/it]


{'train_runtime': 9.0916, 'train_samples_per_second': 1.98, 'train_steps_per_second': 0.33, 'train_loss': 1.9353424708048503, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.61it/s]
100%|██████████| 1/1 [00:00<00:00, 141.85it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.66it/s]
Casting the dataset: 100%|██████████| 7/7 [00:00<00:00, 3499.84 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 958.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Map: 100%|██████████| 7/7 [00:00<00:00, 534.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initializ

{'eval_loss': 2.1110546588897705, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.4444444444444444, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.986, 'eval_steps_per_second': 11.986, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.57s/it]

{'eval_loss': 1.806717872619629, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7936507936507936, 'eval_span_f1': 0.0, 'eval_runtime': 0.1009, 'eval_samples_per_second': 9.908, 'eval_steps_per_second': 9.908, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  2.84s/it]

{'eval_loss': 1.6704820394515991, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0898, 'eval_samples_per_second': 11.138, 'eval_steps_per_second': 11.138, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.17s/it]


{'train_runtime': 9.5112, 'train_samples_per_second': 2.208, 'train_steps_per_second': 0.315, 'train_loss': 1.9219452540079753, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.61it/s]
100%|██████████| 1/1 [00:00<00:00, 70.66it/s]
100%|██████████| 1/1 [00:00<00:00, 164.91it/s]
Casting the dataset: 100%|██████████| 7/7 [00:00<00:00, 3424.72 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Map: 100%|██████████| 7/7 [00:00<00:00, 433.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should 

{'eval_loss': 2.1110546588897705, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.4444444444444444, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.919, 'eval_steps_per_second': 11.919, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.52s/it]

{'eval_loss': 1.806717872619629, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7936507936507936, 'eval_span_f1': 0.0, 'eval_runtime': 0.0865, 'eval_samples_per_second': 11.565, 'eval_steps_per_second': 11.565, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  2.80s/it]

{'eval_loss': 1.6704820394515991, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.986, 'eval_steps_per_second': 11.986, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.14s/it]


{'train_runtime': 9.4176, 'train_samples_per_second': 2.23, 'train_steps_per_second': 0.319, 'train_loss': 1.9219452540079753, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 124.02it/s]
100%|██████████| 1/1 [00:00<00:00, 141.70it/s]
100%|██████████| 1/1 [00:00<00:00, 166.76it/s]
Casting the dataset: 100%|██████████| 7/7 [00:00<00:00, 3503.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Map: 100%|██████████| 7/7 [00:00<00:00, 533.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.1110546588897705, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.4444444444444444, 'eval_span_f1': 0.0, 'eval_runtime': 0.0843, 'eval_samples_per_second': 11.86, 'eval_steps_per_second': 11.86, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.51s/it]

{'eval_loss': 1.806717872619629, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7936507936507936, 'eval_span_f1': 0.0, 'eval_runtime': 0.0918, 'eval_samples_per_second': 10.89, 'eval_steps_per_second': 10.89, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  2.85s/it]

{'eval_loss': 1.6704820394515991, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.228, 'eval_steps_per_second': 12.228, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.15s/it]


{'train_runtime': 9.4607, 'train_samples_per_second': 2.22, 'train_steps_per_second': 0.317, 'train_loss': 1.9219452540079753, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.54it/s]
100%|██████████| 1/1 [00:00<00:00, 141.84it/s]
100%|██████████| 1/1 [00:00<00:00, 165.01it/s]
Casting the dataset: 100%|██████████| 7/7 [00:00<00:00, 6992.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.96 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 487.99 examples/s]
Map: 100%|██████████| 7/7 [00:00<00:00, 496.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.1110546588897705, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.4444444444444444, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.686, 'eval_steps_per_second': 12.686, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:04<00:02,  2.54s/it]

{'eval_loss': 1.806717872619629, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7936507936507936, 'eval_span_f1': 0.0, 'eval_runtime': 0.1001, 'eval_samples_per_second': 9.992, 'eval_steps_per_second': 9.992, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  2.83s/it]

{'eval_loss': 1.6704820394515991, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.936, 'eval_steps_per_second': 11.936, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.15s/it]


{'train_runtime': 9.4639, 'train_samples_per_second': 2.219, 'train_steps_per_second': 0.317, 'train_loss': 1.9219452540079753, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.59it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 149.14it/s]
Casting the dataset: 100%|██████████| 7/7 [00:00<00:00, 7000.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 486.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 7/7 [00:00<00:00, 535.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.39 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.1110546588897705, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.4444444444444444, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.86, 'eval_steps_per_second': 12.86, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.61s/it]

{'eval_loss': 1.806717872619629, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7936507936507936, 'eval_span_f1': 0.0, 'eval_runtime': 0.0939, 'eval_samples_per_second': 10.645, 'eval_steps_per_second': 10.645, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  2.85s/it]

{'eval_loss': 1.6704820394515991, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0874, 'eval_samples_per_second': 11.448, 'eval_steps_per_second': 11.448, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.20s/it]


{'train_runtime': 9.6045, 'train_samples_per_second': 2.186, 'train_steps_per_second': 0.312, 'train_loss': 1.9219452540079753, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.61it/s]
100%|██████████| 1/1 [00:00<00:00, 164.44it/s]
100%|██████████| 1/1 [00:00<00:00, 141.80it/s]
Casting the dataset: 100%|██████████| 8/8 [00:00<00:00, 3999.34 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 956.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.39 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 394.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.42 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 2.1042795181274414, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.4603174603174603, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.639, 'eval_steps_per_second': 11.639, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.68s/it]

{'eval_loss': 1.8090118169784546, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7777777777777778, 'eval_span_f1': 0.0, 'eval_runtime': 0.1064, 'eval_samples_per_second': 9.396, 'eval_steps_per_second': 9.396, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  2.99s/it]

{'eval_loss': 1.6454102993011475, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.092, 'eval_samples_per_second': 10.872, 'eval_steps_per_second': 10.872, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.32s/it]


{'train_runtime': 9.9642, 'train_samples_per_second': 2.409, 'train_steps_per_second': 0.301, 'train_loss': 1.9013573328653972, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.93it/s]
100%|██████████| 1/1 [00:00<00:00, 76.17it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.35it/s]
Casting the dataset: 100%|██████████| 8/8 [00:00<00:00, 3918.08 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 961.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 468.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 320.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 381.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initiali

{'eval_loss': 2.1042795181274414, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.4603174603174603, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.354, 'eval_steps_per_second': 12.354, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.74s/it]

{'eval_loss': 1.8090118169784546, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7777777777777778, 'eval_span_f1': 0.0, 'eval_runtime': 0.0969, 'eval_samples_per_second': 10.324, 'eval_steps_per_second': 10.324, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  3.01s/it]

{'eval_loss': 1.6454102993011475, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.514, 'eval_steps_per_second': 11.514, 'epoch': 3.0}


100%|██████████| 3/3 [00:10<00:00,  3.37s/it]


{'train_runtime': 10.1132, 'train_samples_per_second': 2.373, 'train_steps_per_second': 0.297, 'train_loss': 1.9013573328653972, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.48it/s]
100%|██████████| 1/1 [00:00<00:00, 164.41it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.26it/s]
Casting the dataset: 100%|██████████| 8/8 [00:00<00:00, 7722.54 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 488.85 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.91 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 376.72 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initiali

{'eval_loss': 2.1042795181274414, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.4603174603174603, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.023, 'eval_steps_per_second': 13.023, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.76s/it]

{'eval_loss': 1.8090118169784546, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7777777777777778, 'eval_span_f1': 0.0, 'eval_runtime': 0.0817, 'eval_samples_per_second': 12.234, 'eval_steps_per_second': 12.234, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  3.04s/it]

{'eval_loss': 1.6454102993011475, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.09, 'eval_samples_per_second': 11.116, 'eval_steps_per_second': 11.116, 'epoch': 3.0}


100%|██████████| 3/3 [00:10<00:00,  3.38s/it]


{'train_runtime': 10.1258, 'train_samples_per_second': 2.37, 'train_steps_per_second': 0.296, 'train_loss': 1.9013573328653972, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.04it/s]
100%|██████████| 1/1 [00:00<00:00, 31.79it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.37it/s]
Casting the dataset: 100%|██████████| 8/8 [00:00<00:00, 7994.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 957.82 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 530.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initiali

{'eval_loss': 2.1042795181274414, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.4603174603174603, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.519, 'eval_steps_per_second': 12.519, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.70s/it]

{'eval_loss': 1.8090118169784546, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7777777777777778, 'eval_span_f1': 0.0, 'eval_runtime': 0.0825, 'eval_samples_per_second': 12.121, 'eval_steps_per_second': 12.121, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  2.97s/it]

{'eval_loss': 1.6454102993011475, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.925, 'eval_steps_per_second': 11.925, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.31s/it]


{'train_runtime': 9.919, 'train_samples_per_second': 2.42, 'train_steps_per_second': 0.302, 'train_loss': 1.9013573328653972, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
100%|██████████| 1/1 [00:00<00:00, 141.76it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
Casting the dataset: 100%|██████████| 8/8 [00:00<00:00, 7989.15 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 442.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initializ

{'eval_loss': 2.1042795181274414, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.4603174603174603, 'eval_span_f1': 0.0, 'eval_runtime': 0.0853, 'eval_samples_per_second': 11.719, 'eval_steps_per_second': 11.719, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 2/3 [00:05<00:02,  2.72s/it]

{'eval_loss': 1.8090118169784546, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7777777777777778, 'eval_span_f1': 0.0, 'eval_runtime': 0.0905, 'eval_samples_per_second': 11.052, 'eval_steps_per_second': 11.052, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 3/3 [00:08<00:00,  2.99s/it]

{'eval_loss': 1.6454102993011475, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.873015873015873, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.924, 'eval_steps_per_second': 11.924, 'epoch': 3.0}


100%|██████████| 3/3 [00:09<00:00,  3.32s/it]


{'train_runtime': 9.9471, 'train_samples_per_second': 2.413, 'train_steps_per_second': 0.302, 'train_loss': 1.9013573328653972, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.19it/s]
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.63it/s]
Casting the dataset: 100%|██████████| 9/9 [00:00<00:00, 8992.08 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 9/9 [00:00<00:00, 447.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.50 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initiali

{'eval_loss': 1.7495819330215454, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8253968253968254, 'eval_span_f1': 0.0, 'eval_runtime': 0.0895, 'eval_samples_per_second': 11.168, 'eval_steps_per_second': 11.168, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:06<00:03,  1.60s/it]

{'eval_loss': 1.2228058576583862, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.209, 'eval_steps_per_second': 12.209, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:10<00:00,  1.68s/it]

{'eval_loss': 0.990858256816864, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.138, 'eval_steps_per_second': 12.138, 'epoch': 3.0}


100%|██████████| 6/6 [00:11<00:00,  1.99s/it]


{'train_runtime': 11.9291, 'train_samples_per_second': 2.263, 'train_steps_per_second': 0.503, 'train_loss': 1.3986679712931316, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.41it/s]
100%|██████████| 1/1 [00:00<00:00, 140.68it/s]
100%|██████████| 1/1 [00:00<00:00, 163.81it/s]
Casting the dataset: 100%|██████████| 9/9 [00:00<00:00, 8577.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 9/9 [00:00<00:00, 448.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 1.507007360458374, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0865, 'eval_samples_per_second': 11.561, 'eval_steps_per_second': 11.561, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:06<00:03,  1.63s/it]

{'eval_loss': 0.9991955757141113, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0774, 'eval_samples_per_second': 12.927, 'eval_steps_per_second': 12.927, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.76s/it]

{'eval_loss': 0.9257689714431763, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.222, 'eval_steps_per_second': 12.222, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.07s/it]


{'train_runtime': 12.4481, 'train_samples_per_second': 2.169, 'train_steps_per_second': 0.482, 'train_loss': 1.2470698356628418, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.68it/s]
100%|██████████| 1/1 [00:00<00:00, 141.61it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 9/9 [00:00<00:00, 8996.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 9/9 [00:00<00:00, 471.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 242.75 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 1.507007360458374, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.222, 'eval_steps_per_second': 12.222, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:06<00:03,  1.59s/it]

{'eval_loss': 0.9991955757141113, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.069, 'eval_steps_per_second': 12.069, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:10<00:00,  1.72s/it]

{'eval_loss': 0.9257689714431763, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.646, 'eval_steps_per_second': 11.646, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.03s/it]


{'train_runtime': 12.1813, 'train_samples_per_second': 2.217, 'train_steps_per_second': 0.493, 'train_loss': 1.2470698356628418, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.62it/s]
100%|██████████| 1/1 [00:00<00:00, 166.64it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 9/9 [00:00<00:00, 4499.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Map: 100%|██████████| 9/9 [00:00<00:00, 427.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should

{'eval_loss': 1.507007360458374, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0895, 'eval_samples_per_second': 11.173, 'eval_steps_per_second': 11.173, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:06<00:03,  1.60s/it]

{'eval_loss': 0.9991955757141113, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.923, 'eval_steps_per_second': 11.923, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.78s/it]

{'eval_loss': 0.9257689714431763, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.116, 'eval_samples_per_second': 8.617, 'eval_steps_per_second': 8.617, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.09s/it]


{'train_runtime': 12.5219, 'train_samples_per_second': 2.156, 'train_steps_per_second': 0.479, 'train_loss': 1.2470698356628418, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.34it/s]
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
100%|██████████| 1/1 [00:00<00:00, 164.81it/s]
Casting the dataset: 100%|██████████| 9/9 [00:00<00:00, 4497.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.82 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 9/9 [00:00<00:00, 425.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You shoul

{'eval_loss': 1.507007360458374, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.092, 'eval_samples_per_second': 10.865, 'eval_steps_per_second': 10.865, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.67s/it]

{'eval_loss': 0.9991955757141113, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0906, 'eval_samples_per_second': 11.042, 'eval_steps_per_second': 11.042, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.76s/it]

{'eval_loss': 0.9257689714431763, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.223, 'eval_steps_per_second': 12.223, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.10s/it]


{'train_runtime': 12.5786, 'train_samples_per_second': 2.147, 'train_steps_per_second': 0.477, 'train_loss': 1.2470698356628418, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
100%|██████████| 1/1 [00:00<00:00, 70.80it/s]
100%|██████████| 1/1 [00:00<00:00, 71.01it/s]
Casting the dataset: 100%|██████████| 10/10 [00:00<00:00, 10005.50 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 491.08 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 552.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sho

{'eval_loss': 1.4853298664093018, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.228, 'eval_steps_per_second': 12.228, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:06<00:03,  1.68s/it]

{'eval_loss': 1.0238791704177856, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.523, 'eval_steps_per_second': 11.523, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.82s/it]

{'eval_loss': 0.9006921648979187, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.519, 'eval_steps_per_second': 12.519, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.11s/it]


{'train_runtime': 12.6388, 'train_samples_per_second': 2.374, 'train_steps_per_second': 0.475, 'train_loss': 1.2238384087880452, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.00it/s]
100%|██████████| 1/1 [00:00<00:00, 123.96it/s]
100%|██████████| 1/1 [00:00<00:00, 166.75it/s]
Casting the dataset: 100%|██████████| 10/10 [00:00<00:00, 4997.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 398.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.4853298664093018, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.073, 'eval_steps_per_second': 12.073, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.68s/it]

{'eval_loss': 1.0238791704177856, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.221, 'eval_steps_per_second': 12.221, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.82s/it]

{'eval_loss': 0.9006921648979187, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.935, 'eval_steps_per_second': 11.935, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.11s/it]


{'train_runtime': 12.6723, 'train_samples_per_second': 2.367, 'train_steps_per_second': 0.473, 'train_loss': 1.2238384087880452, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
100%|██████████| 1/1 [00:00<00:00, 141.27it/s]
Casting the dataset: 100%|██████████| 10/10 [00:00<00:00, 9615.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 353.92 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.4853298664093018, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.038, 'eval_steps_per_second': 12.038, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.70s/it]

{'eval_loss': 1.0238791704177856, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.196, 'eval_steps_per_second': 13.196, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.81s/it]

{'eval_loss': 0.9006921648979187, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.209, 'eval_steps_per_second': 12.209, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.11s/it]


{'train_runtime': 12.6403, 'train_samples_per_second': 2.373, 'train_steps_per_second': 0.475, 'train_loss': 1.2238384087880452, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.86it/s]
100%|██████████| 1/1 [00:00<00:00, 141.59it/s]
100%|██████████| 1/1 [00:00<00:00, 164.52it/s]
Casting the dataset: 100%|██████████| 10/10 [00:00<00:00, 4999.77 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.08 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 432.24 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.4853298664093018, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.788, 'eval_steps_per_second': 11.788, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.71s/it]

{'eval_loss': 1.0238791704177856, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.515, 'eval_steps_per_second': 11.515, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.82s/it]

{'eval_loss': 0.9006921648979187, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.367, 'eval_steps_per_second': 12.367, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.13s/it]


{'train_runtime': 12.7873, 'train_samples_per_second': 2.346, 'train_steps_per_second': 0.469, 'train_loss': 1.2238384087880452, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.22it/s]
100%|██████████| 1/1 [00:00<00:00, 141.67it/s]
100%|██████████| 1/1 [00:00<00:00, 165.05it/s]
Casting the dataset: 100%|██████████| 10/10 [00:00<00:00, 9573.85 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 414.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.4853298664093018, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.229, 'eval_steps_per_second': 12.229, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.70s/it]

{'eval_loss': 1.0238791704177856, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.372, 'eval_steps_per_second': 12.372, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.82s/it]

{'eval_loss': 0.9006921648979187, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.646, 'eval_steps_per_second': 11.646, 'epoch': 3.0}


100%|██████████| 6/6 [00:12<00:00,  2.11s/it]


{'train_runtime': 12.6697, 'train_samples_per_second': 2.368, 'train_steps_per_second': 0.474, 'train_loss': 1.2238384087880452, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
100%|██████████| 1/1 [00:00<00:00, 165.03it/s]
100%|██████████| 1/1 [00:00<00:00, 161.28it/s]
Casting the dataset: 100%|██████████| 11/11 [00:00<00:00, 11016.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 376.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.42 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.4764426946640015, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.093, 'eval_samples_per_second': 10.751, 'eval_steps_per_second': 10.751, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.82s/it]

{'eval_loss': 1.007729411125183, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0874, 'eval_samples_per_second': 11.446, 'eval_steps_per_second': 11.446, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.94s/it]

{'eval_loss': 0.883684515953064, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.507, 'eval_steps_per_second': 11.507, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.23s/it]


{'train_runtime': 13.3529, 'train_samples_per_second': 2.471, 'train_steps_per_second': 0.449, 'train_loss': 1.2165114879608154, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.76it/s]
100%|██████████| 1/1 [00:00<00:00, 141.44it/s]
100%|██████████| 1/1 [00:00<00:00, 164.62it/s]
Casting the dataset: 100%|██████████| 11/11 [00:00<00:00, 11024.45 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 365.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
Yo

{'eval_loss': 1.4764426946640015, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.221, 'eval_steps_per_second': 12.221, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.77s/it]

{'eval_loss': 1.007729411125183, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.269, 'eval_steps_per_second': 12.269, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.89s/it]

{'eval_loss': 0.883684515953064, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.927, 'eval_steps_per_second': 11.927, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.17s/it]


{'train_runtime': 13.02, 'train_samples_per_second': 2.535, 'train_steps_per_second': 0.461, 'train_loss': 1.2165114879608154, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.65it/s]
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
100%|██████████| 1/1 [00:00<00:00, 164.22it/s]
Casting the dataset: 100%|██████████| 11/11 [00:00<00:00, 10998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 341.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.47 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.4764426946640015, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.924, 'eval_steps_per_second': 11.924, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.80s/it]

{'eval_loss': 1.007729411125183, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0784, 'eval_samples_per_second': 12.753, 'eval_steps_per_second': 12.753, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.90s/it]

{'eval_loss': 0.883684515953064, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1375, 'eval_samples_per_second': 7.272, 'eval_steps_per_second': 7.272, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.19s/it]


{'train_runtime': 13.1523, 'train_samples_per_second': 2.509, 'train_steps_per_second': 0.456, 'train_loss': 1.2165114879608154, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.55it/s]
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
100%|██████████| 1/1 [00:00<00:00, 164.89it/s]
Casting the dataset: 100%|██████████| 11/11 [00:00<00:00, 10987.70 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 488.22 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 377.53 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.4764426946640015, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.487, 'eval_steps_per_second': 11.487, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.78s/it]

{'eval_loss': 1.007729411125183, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.647, 'eval_steps_per_second': 11.647, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.91s/it]

{'eval_loss': 0.883684515953064, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.771, 'eval_steps_per_second': 11.771, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.19s/it]


{'train_runtime': 13.1561, 'train_samples_per_second': 2.508, 'train_steps_per_second': 0.456, 'train_loss': 1.2165114879608154, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.55it/s]
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
100%|██████████| 1/1 [00:00<00:00, 152.19it/s]
Casting the dataset: 100%|██████████| 11/11 [00:00<00:00, 10606.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 405.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.4764426946640015, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9047619047619048, 'eval_span_f1': 0.0, 'eval_runtime': 0.0942, 'eval_samples_per_second': 10.611, 'eval_steps_per_second': 10.611, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.82s/it]

{'eval_loss': 1.007729411125183, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0883, 'eval_samples_per_second': 11.319, 'eval_steps_per_second': 11.319, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:11<00:00,  1.92s/it]

{'eval_loss': 0.883684515953064, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1333, 'eval_samples_per_second': 7.5, 'eval_steps_per_second': 7.5, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.21s/it]


{'train_runtime': 13.2859, 'train_samples_per_second': 2.484, 'train_steps_per_second': 0.452, 'train_loss': 1.2165114879608154, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.61it/s]
100%|██████████| 1/1 [00:00<00:00, 141.62it/s]
100%|██████████| 1/1 [00:00<00:00, 164.76it/s]
Casting the dataset: 100%|██████████| 12/12 [00:00<00:00, 5809.96 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.38 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 478.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 397.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.48580801486969, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1334, 'eval_samples_per_second': 7.499, 'eval_steps_per_second': 7.499, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.94s/it]

{'eval_loss': 1.002849817276001, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.072, 'eval_steps_per_second': 12.072, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  2.03s/it]

{'eval_loss': 0.8812041282653809, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1026, 'eval_samples_per_second': 9.742, 'eval_steps_per_second': 9.742, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.30s/it]


{'train_runtime': 13.7769, 'train_samples_per_second': 2.613, 'train_steps_per_second': 0.436, 'train_loss': 1.1995085875193279, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 165.14it/s]
Casting the dataset: 100%|██████████| 12/12 [00:00<00:00, 11963.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 963.99 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 397.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.48580801486969, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1253, 'eval_samples_per_second': 7.983, 'eval_steps_per_second': 7.983, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.87s/it]

{'eval_loss': 1.002849817276001, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1323, 'eval_samples_per_second': 7.56, 'eval_steps_per_second': 7.56, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  1.96s/it]

{'eval_loss': 0.8812041282653809, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1597, 'eval_samples_per_second': 6.26, 'eval_steps_per_second': 6.26, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.24s/it]


{'train_runtime': 13.4595, 'train_samples_per_second': 2.675, 'train_steps_per_second': 0.446, 'train_loss': 1.1995085875193279, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.50it/s]
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 141.08it/s]
Casting the dataset: 100%|██████████| 12/12 [00:00<00:00, 12012.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 391.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.48580801486969, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.206, 'eval_steps_per_second': 12.206, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.89s/it]

{'eval_loss': 1.002849817276001, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1385, 'eval_samples_per_second': 7.22, 'eval_steps_per_second': 7.22, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  2.04s/it]

{'eval_loss': 0.8812041282653809, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1318, 'eval_samples_per_second': 7.586, 'eval_steps_per_second': 7.586, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.29s/it]


{'train_runtime': 13.7276, 'train_samples_per_second': 2.622, 'train_steps_per_second': 0.437, 'train_loss': 1.1995085875193279, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
100%|██████████| 1/1 [00:00<00:00, 141.49it/s]
100%|██████████| 1/1 [00:00<00:00, 141.80it/s]
Casting the dataset: 100%|██████████| 12/12 [00:00<00:00, 11986.58 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 384.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.48580801486969, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1293, 'eval_samples_per_second': 7.735, 'eval_steps_per_second': 7.735, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.88s/it]

{'eval_loss': 1.002849817276001, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1258, 'eval_samples_per_second': 7.947, 'eval_steps_per_second': 7.947, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  2.02s/it]

{'eval_loss': 0.8812041282653809, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1283, 'eval_samples_per_second': 7.794, 'eval_steps_per_second': 7.794, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.30s/it]


{'train_runtime': 13.7768, 'train_samples_per_second': 2.613, 'train_steps_per_second': 0.436, 'train_loss': 1.1995085875193279, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 165.07it/s]
100%|██████████| 1/1 [00:00<00:00, 164.21it/s]
Casting the dataset: 100%|██████████| 12/12 [00:00<00:00, 5999.72 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 385.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.48580801486969, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.1321, 'eval_samples_per_second': 7.571, 'eval_steps_per_second': 7.571, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.91s/it]

{'eval_loss': 1.002849817276001, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1396, 'eval_samples_per_second': 7.164, 'eval_steps_per_second': 7.164, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  2.03s/it]

{'eval_loss': 0.8812041282653809, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1263, 'eval_samples_per_second': 7.92, 'eval_steps_per_second': 7.92, 'epoch': 3.0}


100%|██████████| 6/6 [00:13<00:00,  2.30s/it]


{'train_runtime': 13.8092, 'train_samples_per_second': 2.607, 'train_steps_per_second': 0.434, 'train_loss': 1.1995085875193279, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 141.44it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 13/13 [00:00<00:00, 6501.25 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 967.10 examples/s]
Map: 100%|██████████| 13/13 [00:00<00:00, 359.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 241.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.503404140472412, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.833, 'eval_steps_per_second': 12.833, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:07<00:03,  1.97s/it]

{'eval_loss': 0.9790680408477783, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0806, 'eval_samples_per_second': 12.407, 'eval_steps_per_second': 12.407, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  2.11s/it]

{'eval_loss': 0.8696587085723877, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.002, 'eval_steps_per_second': 12.002, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.36s/it]


{'train_runtime': 14.1796, 'train_samples_per_second': 2.75, 'train_steps_per_second': 0.423, 'train_loss': 1.200480063756307, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.70it/s]
100%|██████████| 1/1 [00:00<00:00, 140.43it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 13/13 [00:00<00:00, 6498.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 955.64 examples/s]
Map: 100%|██████████| 13/13 [00:00<00:00, 331.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.503404140472412, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.215, 'eval_steps_per_second': 12.215, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.01s/it]

{'eval_loss': 0.9790680408477783, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.749, 'eval_steps_per_second': 11.749, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.16s/it]

{'eval_loss': 0.8696587085723877, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.225, 'eval_steps_per_second': 12.225, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.42s/it]


{'train_runtime': 14.4896, 'train_samples_per_second': 2.692, 'train_steps_per_second': 0.414, 'train_loss': 1.200480063756307, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
100%|██████████| 1/1 [00:00<00:00, 141.10it/s]
100%|██████████| 1/1 [00:00<00:00, 164.81it/s]
Casting the dataset: 100%|██████████| 13/13 [00:00<00:00, 12979.28 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 13/13 [00:00<00:00, 308.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.503404140472412, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0807, 'eval_samples_per_second': 12.387, 'eval_steps_per_second': 12.387, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.02s/it]

{'eval_loss': 0.9790680408477783, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.029, 'eval_steps_per_second': 13.029, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  2.10s/it]

{'eval_loss': 0.8696587085723877, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.505, 'eval_steps_per_second': 11.505, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.38s/it]


{'train_runtime': 14.2961, 'train_samples_per_second': 2.728, 'train_steps_per_second': 0.42, 'train_loss': 1.200480063756307, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
100%|██████████| 1/1 [00:00<00:00, 165.13it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
Casting the dataset: 100%|██████████| 13/13 [00:00<00:00, 6496.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.91 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 13/13 [00:00<00:00, 331.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.503404140472412, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0785, 'eval_samples_per_second': 12.734, 'eval_steps_per_second': 12.734, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.00s/it]

{'eval_loss': 0.9790680408477783, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.92, 'eval_steps_per_second': 11.92, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  2.14s/it]

{'eval_loss': 0.8696587085723877, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.512, 'eval_steps_per_second': 11.512, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.41s/it]


{'train_runtime': 14.442, 'train_samples_per_second': 2.7, 'train_steps_per_second': 0.415, 'train_loss': 1.200480063756307, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 141.75it/s]
100%|██████████| 1/1 [00:00<00:00, 164.62it/s]
Casting the dataset: 100%|██████████| 13/13 [00:00<00:00, 12994.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 13/13 [00:00<00:00, 315.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.503404140472412, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.061, 'eval_steps_per_second': 12.061, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.04s/it]

{'eval_loss': 0.9790680408477783, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.365, 'eval_steps_per_second': 12.365, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:12<00:00,  2.12s/it]

{'eval_loss': 0.8696587085723877, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.922, 'eval_steps_per_second': 11.922, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.40s/it]


{'train_runtime': 14.4081, 'train_samples_per_second': 2.707, 'train_steps_per_second': 0.416, 'train_loss': 1.200480063756307, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.52it/s]
100%|██████████| 1/1 [00:00<00:00, 163.11it/s]
100%|██████████| 1/1 [00:00<00:00, 164.99it/s]
Casting the dataset: 100%|██████████| 14/14 [00:00<00:00, 6998.84 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 985.97 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 487.88 examples/s]
Map: 100%|██████████| 14/14 [00:00<00:00, 316.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.5130361318588257, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.919, 'eval_steps_per_second': 11.919, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.07s/it]

{'eval_loss': 0.9426148533821106, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0767, 'eval_samples_per_second': 13.03, 'eval_steps_per_second': 13.03, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.19s/it]

{'eval_loss': 0.8592904210090637, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.777, 'eval_steps_per_second': 11.777, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.42s/it]


{'train_runtime': 14.5234, 'train_samples_per_second': 2.892, 'train_steps_per_second': 0.413, 'train_loss': 1.1964253584543865, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
100%|██████████| 1/1 [00:00<00:00, 140.69it/s]
100%|██████████| 1/1 [00:00<00:00, 165.20it/s]
Casting the dataset: 100%|██████████| 14/14 [00:00<00:00, 14044.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 14/14 [00:00<00:00, 278.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
Yo

{'eval_loss': 1.5130361318588257, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.078, 'eval_samples_per_second': 12.824, 'eval_steps_per_second': 12.824, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.11s/it]

{'eval_loss': 0.9426148533821106, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.229, 'eval_steps_per_second': 12.229, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.22s/it]

{'eval_loss': 0.8592904210090637, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.076, 'eval_steps_per_second': 12.076, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.45s/it]


{'train_runtime': 14.6912, 'train_samples_per_second': 2.859, 'train_steps_per_second': 0.408, 'train_loss': 1.1964253584543865, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.75it/s]
100%|██████████| 1/1 [00:00<00:00, 141.65it/s]
100%|██████████| 1/1 [00:00<00:00, 141.32it/s]
Casting the dataset: 100%|██████████| 14/14 [00:00<00:00, 13291.14 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 14/14 [00:00<00:00, 296.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.55 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.5130361318588257, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.784, 'eval_steps_per_second': 11.784, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.07s/it]

{'eval_loss': 0.9426148533821106, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.36, 'eval_steps_per_second': 12.36, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.20s/it]

{'eval_loss': 0.8592904210090637, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.072, 'eval_steps_per_second': 12.072, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.43s/it]


{'train_runtime': 14.5809, 'train_samples_per_second': 2.88, 'train_steps_per_second': 0.411, 'train_loss': 1.1964253584543865, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.32it/s]
100%|██████████| 1/1 [00:00<00:00, 141.04it/s]
100%|██████████| 1/1 [00:00<00:00, 152.37it/s]
Casting the dataset: 100%|██████████| 14/14 [00:00<00:00, 14024.42 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 962.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 14/14 [00:00<00:00, 290.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.5130361318588257, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.203, 'eval_steps_per_second': 12.203, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.11s/it]

{'eval_loss': 0.9426148533821106, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0842, 'eval_samples_per_second': 11.876, 'eval_steps_per_second': 11.876, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.22s/it]

{'eval_loss': 0.8592904210090637, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.367, 'eval_steps_per_second': 12.367, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.47s/it]


{'train_runtime': 14.7914, 'train_samples_per_second': 2.839, 'train_steps_per_second': 0.406, 'train_loss': 1.1964253584543865, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.71it/s]
100%|██████████| 1/1 [00:00<00:00, 141.46it/s]
100%|██████████| 1/1 [00:00<00:00, 141.34it/s]
Casting the dataset: 100%|██████████| 14/14 [00:00<00:00, 14001.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 14/14 [00:00<00:00, 284.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.19 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.5130361318588257, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0771, 'eval_samples_per_second': 12.971, 'eval_steps_per_second': 12.971, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.13s/it]

{'eval_loss': 0.9426148533821106, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.361, 'eval_steps_per_second': 12.361, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.23s/it]

{'eval_loss': 0.8592904210090637, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.848, 'eval_steps_per_second': 12.848, 'epoch': 3.0}


100%|██████████| 6/6 [00:14<00:00,  2.49s/it]


{'train_runtime': 14.9133, 'train_samples_per_second': 2.816, 'train_steps_per_second': 0.402, 'train_loss': 1.1964253584543865, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 141.40it/s]
100%|██████████| 1/1 [00:00<00:00, 141.82it/s]
Casting the dataset: 100%|██████████| 15/15 [00:00<00:00, 14976.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.20 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 15/15 [00:00<00:00, 281.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.5112390518188477, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.016, 'eval_steps_per_second': 13.016, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.19s/it]

{'eval_loss': 0.9719721078872681, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0784, 'eval_samples_per_second': 12.752, 'eval_steps_per_second': 12.752, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.34s/it]

{'eval_loss': 0.8631712198257446, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.02, 'eval_steps_per_second': 13.02, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.56s/it]


{'train_runtime': 15.3295, 'train_samples_per_second': 2.936, 'train_steps_per_second': 0.391, 'train_loss': 1.2040361563364665, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.55it/s]
100%|██████████| 1/1 [00:00<00:00, 141.54it/s]
100%|██████████| 1/1 [00:00<00:00, 165.20it/s]
Casting the dataset: 100%|██████████| 15/15 [00:00<00:00, 14473.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 487.37 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 15/15 [00:00<00:00, 281.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.87 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.5112390518188477, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.068, 'eval_steps_per_second': 12.068, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.17s/it]

{'eval_loss': 0.9719721078872681, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.193, 'eval_steps_per_second': 13.193, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.32s/it]

{'eval_loss': 0.8631712198257446, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.374, 'eval_steps_per_second': 13.374, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.52s/it]


{'train_runtime': 15.091, 'train_samples_per_second': 2.982, 'train_steps_per_second': 0.398, 'train_loss': 1.2040361563364665, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.21it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 15/15 [00:00<00:00, 14463.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 950.23 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 15/15 [00:00<00:00, 248.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.5112390518188477, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.836, 'eval_steps_per_second': 12.836, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.20s/it]

{'eval_loss': 0.9719721078872681, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.198, 'eval_steps_per_second': 13.198, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.30s/it]

{'eval_loss': 0.8631712198257446, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.211, 'eval_steps_per_second': 12.211, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.52s/it]


{'train_runtime': 15.1403, 'train_samples_per_second': 2.972, 'train_steps_per_second': 0.396, 'train_loss': 1.2040361563364665, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.29it/s]
100%|██████████| 1/1 [00:00<00:00, 165.21it/s]
100%|██████████| 1/1 [00:00<00:00, 141.42it/s]
Casting the dataset: 100%|██████████| 15/15 [00:00<00:00, 7492.50 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 15/15 [00:00<00:00, 246.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.89 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.5112390518188477, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0759, 'eval_samples_per_second': 13.181, 'eval_steps_per_second': 13.181, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.19s/it]

{'eval_loss': 0.9719721078872681, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0878, 'eval_samples_per_second': 11.383, 'eval_steps_per_second': 11.383, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.30s/it]

{'eval_loss': 0.8631712198257446, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0749, 'eval_samples_per_second': 13.36, 'eval_steps_per_second': 13.36, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.53s/it]


{'train_runtime': 15.2022, 'train_samples_per_second': 2.96, 'train_steps_per_second': 0.395, 'train_loss': 1.2040361563364665, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 30.96it/s]
100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
Casting the dataset: 100%|██████████| 15/15 [00:00<00:00, 14993.94 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 958.70 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 15/15 [00:00<00:00, 261.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.5112390518188477, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0812, 'eval_samples_per_second': 12.318, 'eval_steps_per_second': 12.318, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.18s/it]

{'eval_loss': 0.9719721078872681, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.229, 'eval_steps_per_second': 12.229, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:13<00:00,  2.31s/it]

{'eval_loss': 0.8631712198257446, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.223, 'eval_steps_per_second': 12.223, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.53s/it]


{'train_runtime': 15.1815, 'train_samples_per_second': 2.964, 'train_steps_per_second': 0.395, 'train_loss': 1.2040361563364665, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.08it/s]
100%|██████████| 1/1 [00:00<00:00, 141.82it/s]
100%|██████████| 1/1 [00:00<00:00, 140.58it/s]
Casting the dataset: 100%|██████████| 16/16 [00:00<00:00, 15978.30 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 230.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 244.47 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.5027827024459839, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.636, 'eval_steps_per_second': 11.636, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:09<00:04,  2.27s/it]

{'eval_loss': 0.9892184734344482, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.226, 'eval_steps_per_second': 12.226, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:14<00:00,  2.34s/it]

{'eval_loss': 0.8705340027809143, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0757, 'eval_samples_per_second': 13.202, 'eval_steps_per_second': 13.202, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.58s/it]


{'train_runtime': 15.4667, 'train_samples_per_second': 3.103, 'train_steps_per_second': 0.388, 'train_loss': 1.2008299827575684, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.21it/s]
100%|██████████| 1/1 [00:00<00:00, 141.42it/s]
100%|██████████| 1/1 [00:00<00:00, 162.12it/s]
Casting the dataset: 100%|██████████| 16/16 [00:00<00:00, 15997.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 482.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 274.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.5027827024459839, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0855, 'eval_samples_per_second': 11.698, 'eval_steps_per_second': 11.698, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:08<00:04,  2.25s/it]

{'eval_loss': 0.9892184734344482, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.52, 'eval_steps_per_second': 11.52, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:14<00:00,  2.37s/it]

{'eval_loss': 0.8705340027809143, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.86, 'eval_steps_per_second': 12.86, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.57s/it]


{'train_runtime': 15.3983, 'train_samples_per_second': 3.117, 'train_steps_per_second': 0.39, 'train_loss': 1.2008299827575684, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.34it/s]
100%|██████████| 1/1 [00:00<00:00, 123.70it/s]
100%|██████████| 1/1 [00:00<00:00, 164.92it/s]
Casting the dataset: 100%|██████████| 16/16 [00:00<00:00, 15997.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 963.32 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 248.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.32 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.5027827024459839, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0921, 'eval_samples_per_second': 10.861, 'eval_steps_per_second': 10.861, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:09<00:04,  2.30s/it]

{'eval_loss': 0.9892184734344482, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0941, 'eval_samples_per_second': 10.63, 'eval_steps_per_second': 10.63, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:14<00:00,  2.38s/it]

{'eval_loss': 0.8705340027809143, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.781, 'eval_steps_per_second': 11.781, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.59s/it]


{'train_runtime': 15.5519, 'train_samples_per_second': 3.086, 'train_steps_per_second': 0.386, 'train_loss': 1.2008299827575684, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.93it/s]
100%|██████████| 1/1 [00:00<00:00, 141.63it/s]
100%|██████████| 1/1 [00:00<00:00, 164.97it/s]
Casting the dataset: 100%|██████████| 16/16 [00:00<00:00, 7994.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 256.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.5027827024459839, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.786, 'eval_steps_per_second': 11.786, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:09<00:04,  2.28s/it]

{'eval_loss': 0.9892184734344482, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.221, 'eval_steps_per_second': 12.221, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:14<00:00,  2.39s/it]

{'eval_loss': 0.8705340027809143, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.293, 'eval_steps_per_second': 12.293, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.60s/it]


{'train_runtime': 15.6237, 'train_samples_per_second': 3.072, 'train_steps_per_second': 0.384, 'train_loss': 1.2008299827575684, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.27it/s]
100%|██████████| 1/1 [00:00<00:00, 141.05it/s]
100%|██████████| 1/1 [00:00<00:00, 164.88it/s]
Casting the dataset: 100%|██████████| 16/16 [00:00<00:00, 7998.67 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 214.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.18 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.5027827024459839, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8968253968253969, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.923, 'eval_steps_per_second': 11.923, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 4/6 [00:09<00:04,  2.28s/it]

{'eval_loss': 0.9892184734344482, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.063, 'eval_steps_per_second': 12.063, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 6/6 [00:14<00:00,  2.36s/it]

{'eval_loss': 0.8705340027809143, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.925, 'eval_steps_per_second': 11.925, 'epoch': 3.0}


100%|██████████| 6/6 [00:15<00:00,  2.58s/it]


{'train_runtime': 15.4824, 'train_samples_per_second': 3.1, 'train_steps_per_second': 0.388, 'train_loss': 1.2008299827575684, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.67it/s]
100%|██████████| 1/1 [00:00<00:00, 164.91it/s]
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
Casting the dataset: 100%|██████████| 17/17 [00:00<00:00, 8488.47 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 959.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 17/17 [00:00<00:00, 206.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.0353295803070068, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0899, 'eval_samples_per_second': 11.122, 'eval_steps_per_second': 11.122, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:10<00:04,  1.57s/it]

{'eval_loss': 0.7300063967704773, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.218, 'eval_steps_per_second': 12.218, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:16<00:00,  1.65s/it]

{'eval_loss': 0.6896291971206665, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.623, 'eval_steps_per_second': 11.623, 'epoch': 3.0}


100%|██████████| 9/9 [00:17<00:00,  1.94s/it]


{'train_runtime': 17.4445, 'train_samples_per_second': 2.924, 'train_steps_per_second': 0.516, 'train_loss': 0.9918411042955186, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
100%|██████████| 1/1 [00:00<00:00, 164.81it/s]
Casting the dataset: 100%|██████████| 17/17 [00:00<00:00, 8506.70 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 17/17 [00:00<00:00, 231.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.095243215560913, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0759, 'eval_samples_per_second': 13.17, 'eval_steps_per_second': 13.17, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:10<00:04,  1.61s/it]

{'eval_loss': 0.6685498952865601, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.2, 'eval_steps_per_second': 12.2, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:16<00:00,  1.64s/it]

{'eval_loss': 0.6440972089767456, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.205, 'eval_steps_per_second': 12.205, 'epoch': 3.0}


100%|██████████| 9/9 [00:17<00:00,  1.97s/it]


{'train_runtime': 17.739, 'train_samples_per_second': 2.875, 'train_steps_per_second': 0.507, 'train_loss': 0.9969716601901584, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.52it/s]
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
100%|██████████| 1/1 [00:00<00:00, 141.07it/s]
Casting the dataset: 100%|██████████| 17/17 [00:00<00:00, 8491.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 17/17 [00:00<00:00, 238.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.095243215560913, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.278, 'eval_steps_per_second': 12.278, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:10<00:04,  1.59s/it]

{'eval_loss': 0.6685498952865601, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.918, 'eval_steps_per_second': 11.918, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:16<00:00,  1.65s/it]

{'eval_loss': 0.6440972089767456, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.362, 'eval_steps_per_second': 12.362, 'epoch': 3.0}


100%|██████████| 9/9 [00:17<00:00,  1.98s/it]


{'train_runtime': 17.8102, 'train_samples_per_second': 2.864, 'train_steps_per_second': 0.505, 'train_loss': 0.9969716601901584, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
100%|██████████| 1/1 [00:00<00:00, 141.51it/s]
100%|██████████| 1/1 [00:00<00:00, 164.71it/s]
Casting the dataset: 100%|██████████| 17/17 [00:00<00:00, 8494.54 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 485.79 examples/s]
Map: 100%|██████████| 17/17 [00:00<00:00, 211.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 243.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.85 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 1.095243215560913, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1306, 'eval_samples_per_second': 7.659, 'eval_steps_per_second': 7.659, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:10<00:04,  1.60s/it]

{'eval_loss': 0.6685498952865601, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0749, 'eval_samples_per_second': 13.356, 'eval_steps_per_second': 13.356, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:16<00:00,  1.67s/it]

{'eval_loss': 0.6440972089767456, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.217, 'eval_steps_per_second': 12.217, 'epoch': 3.0}


100%|██████████| 9/9 [00:17<00:00,  1.98s/it]


{'train_runtime': 17.8574, 'train_samples_per_second': 2.856, 'train_steps_per_second': 0.504, 'train_loss': 0.9969716601901584, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
100%|██████████| 1/1 [00:00<00:00, 141.30it/s]
100%|██████████| 1/1 [00:00<00:00, 164.69it/s]
Casting the dataset: 100%|██████████| 17/17 [00:00<00:00, 8499.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 959.79 examples/s]
Map: 100%|██████████| 17/17 [00:00<00:00, 227.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.095243215560913, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.216, 'eval_steps_per_second': 12.216, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:10<00:04,  1.58s/it]

{'eval_loss': 0.6685498952865601, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.206, 'eval_steps_per_second': 12.206, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:16<00:00,  1.65s/it]

{'eval_loss': 0.6440972089767456, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.213, 'eval_steps_per_second': 12.213, 'epoch': 3.0}


100%|██████████| 9/9 [00:17<00:00,  1.97s/it]


{'train_runtime': 17.7083, 'train_samples_per_second': 2.88, 'train_steps_per_second': 0.508, 'train_loss': 0.9969716601901584, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 52.12it/s]
100%|██████████| 1/1 [00:00<00:00, 123.90it/s]
100%|██████████| 1/1 [00:00<00:00, 141.57it/s]
Casting the dataset: 100%|██████████| 18/18 [00:00<00:00, 8991.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Map: 100%|██████████| 18/18 [00:00<00:00, 243.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sho

{'eval_loss': 1.0912648439407349, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.362, 'eval_steps_per_second': 12.362, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:10<00:04,  1.64s/it]

{'eval_loss': 0.6704400181770325, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.753, 'eval_steps_per_second': 11.753, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:16<00:00,  1.70s/it]

{'eval_loss': 0.6487775444984436, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.216, 'eval_steps_per_second': 12.216, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.00s/it]


{'train_runtime': 18.0419, 'train_samples_per_second': 2.993, 'train_steps_per_second': 0.499, 'train_loss': 0.9920443428887261, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.25it/s]
100%|██████████| 1/1 [00:00<00:00, 141.71it/s]
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
Casting the dataset: 100%|██████████| 18/18 [00:00<00:00, 17992.72 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 962.22 examples/s]
Map: 100%|██████████| 18/18 [00:00<00:00, 226.31 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.32 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.0912648439407349, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.221, 'eval_steps_per_second': 12.221, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:10<00:04,  1.66s/it]

{'eval_loss': 0.6704400181770325, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.897, 'eval_steps_per_second': 11.897, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:16<00:00,  1.69s/it]

{'eval_loss': 0.6487775444984436, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.919, 'eval_steps_per_second': 11.919, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.01s/it]


{'train_runtime': 18.1214, 'train_samples_per_second': 2.98, 'train_steps_per_second': 0.497, 'train_loss': 0.9920443428887261, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.62it/s]
100%|██████████| 1/1 [00:00<00:00, 141.57it/s]
100%|██████████| 1/1 [00:00<00:00, 164.48it/s]
Casting the dataset: 100%|██████████| 18/18 [00:00<00:00, 17984.15 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.57 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Map: 100%|██████████| 18/18 [00:00<00:00, 221.92 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0912648439407349, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.903, 'eval_steps_per_second': 11.903, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:10<00:05,  1.67s/it]

{'eval_loss': 0.6704400181770325, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.198, 'eval_steps_per_second': 13.198, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:16<00:00,  1.74s/it]

{'eval_loss': 0.6487775444984436, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.068, 'eval_steps_per_second': 12.068, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.04s/it]


{'train_runtime': 18.3926, 'train_samples_per_second': 2.936, 'train_steps_per_second': 0.489, 'train_loss': 0.9920443428887261, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 152.90it/s]
100%|██████████| 1/1 [00:00<00:00, 142.82it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 18/18 [00:00<00:00, 9001.73 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 961.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.28 examples/s]
Map: 100%|██████████| 18/18 [00:00<00:00, 218.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.64 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0912648439407349, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0782, 'eval_samples_per_second': 12.78, 'eval_steps_per_second': 12.78, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:10<00:04,  1.64s/it]

{'eval_loss': 0.6704400181770325, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.409, 'eval_steps_per_second': 11.409, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:16<00:00,  1.68s/it]

{'eval_loss': 0.6487775444984436, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.776, 'eval_steps_per_second': 11.776, 'epoch': 3.0}


100%|██████████| 9/9 [00:17<00:00,  1.98s/it]


{'train_runtime': 17.7858, 'train_samples_per_second': 3.036, 'train_steps_per_second': 0.506, 'train_loss': 0.9920443428887261, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.92it/s]
100%|██████████| 1/1 [00:00<00:00, 141.11it/s]
100%|██████████| 1/1 [00:00<00:00, 141.42it/s]
Casting the dataset: 100%|██████████| 18/18 [00:00<00:00, 8774.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 960.67 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 18/18 [00:00<00:00, 232.53 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 399.42 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0912648439407349, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0793, 'eval_samples_per_second': 12.603, 'eval_steps_per_second': 12.603, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:10<00:05,  1.67s/it]

{'eval_loss': 0.6704400181770325, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.356, 'eval_steps_per_second': 12.356, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:16<00:00,  1.70s/it]

{'eval_loss': 0.6487775444984436, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.011, 'eval_steps_per_second': 13.011, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.02s/it]


{'train_runtime': 18.137, 'train_samples_per_second': 2.977, 'train_steps_per_second': 0.496, 'train_loss': 0.9920443428887261, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.66it/s]
100%|██████████| 1/1 [00:00<00:00, 141.75it/s]
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
Casting the dataset: 100%|██████████| 19/19 [00:00<00:00, 18177.87 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.99 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Map: 100%|██████████| 19/19 [00:00<00:00, 174.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 197.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.0894275903701782, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.514, 'eval_steps_per_second': 12.514, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.72s/it]

{'eval_loss': 0.6699498295783997, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.763, 'eval_steps_per_second': 11.763, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.78s/it]

{'eval_loss': 0.6430567502975464, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.794, 'eval_steps_per_second': 11.794, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.08s/it]


{'train_runtime': 18.727, 'train_samples_per_second': 3.044, 'train_steps_per_second': 0.481, 'train_loss': 0.9927052391899956, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.39it/s]
100%|██████████| 1/1 [00:00<00:00, 141.77it/s]
100%|██████████| 1/1 [00:00<00:00, 142.90it/s]
Casting the dataset: 100%|██████████| 19/19 [00:00<00:00, 9491.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 19/19 [00:00<00:00, 227.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.50 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0894275903701782, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.048, 'eval_steps_per_second': 12.048, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:10<00:05,  1.69s/it]

{'eval_loss': 0.6699498295783997, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0921, 'eval_samples_per_second': 10.86, 'eval_steps_per_second': 10.86, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.76s/it]

{'eval_loss': 0.6430567502975464, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0921, 'eval_samples_per_second': 10.863, 'eval_steps_per_second': 10.863, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.06s/it]


{'train_runtime': 18.4986, 'train_samples_per_second': 3.081, 'train_steps_per_second': 0.487, 'train_loss': 0.9927052391899956, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 124.04it/s]
100%|██████████| 1/1 [00:00<00:00, 141.63it/s]
100%|██████████| 1/1 [00:00<00:00, 164.94it/s]
Casting the dataset: 100%|██████████| 19/19 [00:00<00:00, 9506.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 19/19 [00:00<00:00, 199.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.81 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0894275903701782, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.507, 'eval_steps_per_second': 11.507, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.74s/it]

{'eval_loss': 0.6699498295783997, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.649, 'eval_steps_per_second': 11.649, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.78s/it]

{'eval_loss': 0.6430567502975464, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.502, 'eval_steps_per_second': 11.502, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.10s/it]


{'train_runtime': 18.8688, 'train_samples_per_second': 3.021, 'train_steps_per_second': 0.477, 'train_loss': 0.9927052391899956, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.41it/s]
100%|██████████| 1/1 [00:00<00:00, 164.64it/s]
100%|██████████| 1/1 [00:00<00:00, 164.61it/s]
Casting the dataset: 100%|██████████| 19/19 [00:00<00:00, 9501.82 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 486.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 19/19 [00:00<00:00, 214.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 248.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.0894275903701782, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1281, 'eval_samples_per_second': 7.806, 'eval_steps_per_second': 7.806, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.73s/it]

{'eval_loss': 0.6699498295783997, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.508, 'eval_steps_per_second': 11.508, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.81s/it]

{'eval_loss': 0.6430567502975464, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.624, 'eval_steps_per_second': 11.624, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.10s/it]


{'train_runtime': 18.9342, 'train_samples_per_second': 3.01, 'train_steps_per_second': 0.475, 'train_loss': 0.9927052391899956, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.33it/s]
100%|██████████| 1/1 [00:00<00:00, 164.53it/s]
100%|██████████| 1/1 [00:00<00:00, 141.62it/s]
Casting the dataset: 100%|██████████| 19/19 [00:00<00:00, 9294.59 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 957.82 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 19/19 [00:00<00:00, 222.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0894275903701782, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0865, 'eval_samples_per_second': 11.556, 'eval_steps_per_second': 11.556, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.71s/it]

{'eval_loss': 0.6699498295783997, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0936, 'eval_samples_per_second': 10.679, 'eval_steps_per_second': 10.679, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.79s/it]

{'eval_loss': 0.6430567502975464, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.223, 'eval_steps_per_second': 12.223, 'epoch': 3.0}


100%|██████████| 9/9 [00:18<00:00,  2.07s/it]


{'train_runtime': 18.664, 'train_samples_per_second': 3.054, 'train_steps_per_second': 0.482, 'train_loss': 0.9927052391899956, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.27it/s]
100%|██████████| 1/1 [00:00<00:00, 164.62it/s]
100%|██████████| 1/1 [00:00<00:00, 165.25it/s]
Casting the dataset: 100%|██████████| 20/20 [00:00<00:00, 9995.96 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 216.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 315.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.088736891746521, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.138, 'eval_samples_per_second': 7.247, 'eval_steps_per_second': 7.247, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.82s/it]

{'eval_loss': 0.6699844598770142, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1354, 'eval_samples_per_second': 7.388, 'eval_steps_per_second': 7.388, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  1.88s/it]

{'eval_loss': 0.6362370252609253, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1345, 'eval_samples_per_second': 7.434, 'eval_steps_per_second': 7.434, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.17s/it]


{'train_runtime': 19.4937, 'train_samples_per_second': 3.078, 'train_steps_per_second': 0.462, 'train_loss': 0.9937665727403429, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.70it/s]
100%|██████████| 1/1 [00:00<00:00, 164.76it/s]
100%|██████████| 1/1 [00:00<00:00, 164.69it/s]
Casting the dataset: 100%|██████████| 20/20 [00:00<00:00, 20030.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 945.30 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 196.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.39 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.088736891746521, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1609, 'eval_samples_per_second': 6.216, 'eval_steps_per_second': 6.216, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.79s/it]

{'eval_loss': 0.6699844598770142, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1143, 'eval_samples_per_second': 8.751, 'eval_steps_per_second': 8.751, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.85s/it]

{'eval_loss': 0.6362370252609253, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0971, 'eval_samples_per_second': 10.298, 'eval_steps_per_second': 10.298, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.14s/it]


{'train_runtime': 19.2977, 'train_samples_per_second': 3.109, 'train_steps_per_second': 0.466, 'train_loss': 0.9937665727403429, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
100%|██████████| 1/1 [00:00<00:00, 164.55it/s]
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
Casting the dataset: 100%|██████████| 20/20 [00:00<00:00, 20034.89 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 226.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 244.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.93 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.088736891746521, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1425, 'eval_samples_per_second': 7.016, 'eval_steps_per_second': 7.016, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.82s/it]

{'eval_loss': 0.6699844598770142, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1293, 'eval_samples_per_second': 7.733, 'eval_steps_per_second': 7.733, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  1.90s/it]

{'eval_loss': 0.6362370252609253, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1232, 'eval_samples_per_second': 8.117, 'eval_steps_per_second': 8.117, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.18s/it]


{'train_runtime': 19.6501, 'train_samples_per_second': 3.053, 'train_steps_per_second': 0.458, 'train_loss': 0.9937665727403429, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 139.68it/s]
100%|██████████| 1/1 [00:00<00:00, 141.67it/s]
100%|██████████| 1/1 [00:00<00:00, 141.43it/s]
Casting the dataset: 100%|██████████| 20/20 [00:00<00:00, 20020.54 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 211.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.088736891746521, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1184, 'eval_samples_per_second': 8.447, 'eval_steps_per_second': 8.447, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.80s/it]

{'eval_loss': 0.6699844598770142, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.13, 'eval_samples_per_second': 7.694, 'eval_steps_per_second': 7.694, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.88s/it]

{'eval_loss': 0.6362370252609253, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1137, 'eval_samples_per_second': 8.797, 'eval_steps_per_second': 8.797, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.14s/it]


{'train_runtime': 19.2967, 'train_samples_per_second': 3.109, 'train_steps_per_second': 0.466, 'train_loss': 0.9937665727403429, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.27it/s]
100%|██████████| 1/1 [00:00<00:00, 142.41it/s]
100%|██████████| 1/1 [00:00<00:00, 164.86it/s]
Casting the dataset: 100%|██████████| 20/20 [00:00<00:00, 20001.45 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 202.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.87 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.088736891746521, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1268, 'eval_samples_per_second': 7.887, 'eval_steps_per_second': 7.887, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.79s/it]

{'eval_loss': 0.6699844598770142, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1435, 'eval_samples_per_second': 6.969, 'eval_steps_per_second': 6.969, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:17<00:00,  1.85s/it]

{'eval_loss': 0.6362370252609253, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.916, 'eval_steps_per_second': 11.916, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.13s/it]


{'train_runtime': 19.1595, 'train_samples_per_second': 3.132, 'train_steps_per_second': 0.47, 'train_loss': 0.9937665727403429, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.52it/s]
100%|██████████| 1/1 [00:00<00:00, 164.66it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
Casting the dataset: 100%|██████████| 21/21 [00:00<00:00, 10498.26 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 21/21 [00:00<00:00, 189.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.58 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.0872477293014526, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.093, 'eval_samples_per_second': 10.754, 'eval_steps_per_second': 10.754, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:05,  1.91s/it]

{'eval_loss': 0.6692073941230774, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.711, 'eval_steps_per_second': 11.711, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  1.97s/it]

{'eval_loss': 0.6336166858673096, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.644, 'eval_steps_per_second': 11.644, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.23s/it]


{'train_runtime': 20.0353, 'train_samples_per_second': 3.144, 'train_steps_per_second': 0.449, 'train_loss': 0.9984823862711588, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.99it/s]
100%|██████████| 1/1 [00:00<00:00, 165.12it/s]
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
Casting the dataset: 100%|██████████| 21/21 [00:00<00:00, 10329.59 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 21/21 [00:00<00:00, 198.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.0872477293014526, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.912, 'eval_steps_per_second': 11.912, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.91s/it]

{'eval_loss': 0.6692073941230774, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.92, 'eval_steps_per_second': 11.92, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  1.99s/it]

{'eval_loss': 0.6336166858673096, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.93, 'eval_steps_per_second': 11.93, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.23s/it]


{'train_runtime': 20.0979, 'train_samples_per_second': 3.135, 'train_steps_per_second': 0.448, 'train_loss': 0.9984823862711588, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.62it/s]
100%|██████████| 1/1 [00:00<00:00, 141.75it/s]
100%|██████████| 1/1 [00:00<00:00, 165.05it/s]
Casting the dataset: 100%|██████████| 21/21 [00:00<00:00, 20082.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Map: 100%|██████████| 21/21 [00:00<00:00, 195.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.55 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.0872477293014526, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.186, 'eval_steps_per_second': 12.186, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.86s/it]

{'eval_loss': 0.6692073941230774, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.367, 'eval_steps_per_second': 12.367, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  1.93s/it]

{'eval_loss': 0.6336166858673096, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.358, 'eval_steps_per_second': 12.358, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.18s/it]


{'train_runtime': 19.6309, 'train_samples_per_second': 3.209, 'train_steps_per_second': 0.458, 'train_loss': 0.9984823862711588, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.29it/s]
100%|██████████| 1/1 [00:00<00:00, 165.02it/s]
100%|██████████| 1/1 [00:00<00:00, 141.26it/s]
Casting the dataset: 100%|██████████| 21/21 [00:00<00:00, 10490.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 21/21 [00:00<00:00, 201.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0872477293014526, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.531, 'eval_steps_per_second': 12.531, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.89s/it]

{'eval_loss': 0.6692073941230774, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.93, 'eval_steps_per_second': 11.93, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  1.97s/it]

{'eval_loss': 0.6336166858673096, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.934, 'eval_steps_per_second': 11.934, 'epoch': 3.0}


100%|██████████| 9/9 [00:19<00:00,  2.21s/it]


{'train_runtime': 19.8896, 'train_samples_per_second': 3.167, 'train_steps_per_second': 0.452, 'train_loss': 0.9984823862711588, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 98.52it/s]
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
100%|██████████| 1/1 [00:00<00:00, 165.25it/s]
Casting the dataset: 100%|██████████| 21/21 [00:00<00:00, 20996.52 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 21/21 [00:00<00:00, 208.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.63 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.0872477293014526, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.501, 'eval_steps_per_second': 11.501, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:11<00:05,  1.92s/it]

{'eval_loss': 0.6692073941230774, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.777, 'eval_steps_per_second': 11.777, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  1.97s/it]

{'eval_loss': 0.6336166858673096, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.375, 'eval_steps_per_second': 12.375, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.23s/it]


{'train_runtime': 20.0956, 'train_samples_per_second': 3.135, 'train_steps_per_second': 0.448, 'train_loss': 0.9984823862711588, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 165.22it/s]
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
Casting the dataset: 100%|██████████| 22/22 [00:00<00:00, 22022.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 963.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 22/22 [00:00<00:00, 188.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 324.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 318.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.24 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0870440006256104, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.374, 'eval_steps_per_second': 13.374, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:05,  1.96s/it]

{'eval_loss': 0.669303297996521, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.643, 'eval_steps_per_second': 11.643, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  2.04s/it]

{'eval_loss': 0.6338397860527039, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.223, 'eval_steps_per_second': 12.223, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.25s/it]


{'train_runtime': 20.2562, 'train_samples_per_second': 3.258, 'train_steps_per_second': 0.444, 'train_loss': 0.9977606667412652, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.40it/s]
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
100%|██████████| 1/1 [00:00<00:00, 162.78it/s]
Casting the dataset: 100%|██████████| 22/22 [00:00<00:00, 10991.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 22/22 [00:00<00:00, 208.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0870440006256104, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.643, 'eval_steps_per_second': 11.643, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:05,  1.98s/it]

{'eval_loss': 0.669303297996521, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.643, 'eval_steps_per_second': 11.643, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  2.00s/it]

{'eval_loss': 0.6338397860527039, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.362, 'eval_steps_per_second': 12.362, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.25s/it]


{'train_runtime': 20.2283, 'train_samples_per_second': 3.263, 'train_steps_per_second': 0.445, 'train_loss': 0.9977606667412652, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
100%|██████████| 1/1 [00:00<00:00, 140.55it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
Casting the dataset: 100%|██████████| 22/22 [00:00<00:00, 10782.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 22/22 [00:00<00:00, 190.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0870440006256104, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.006, 'eval_steps_per_second': 13.006, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:06,  2.00s/it]

{'eval_loss': 0.669303297996521, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.364, 'eval_steps_per_second': 13.364, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.06s/it]

{'eval_loss': 0.6338397860527039, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.773, 'eval_steps_per_second': 11.773, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.28s/it]


{'train_runtime': 20.5492, 'train_samples_per_second': 3.212, 'train_steps_per_second': 0.438, 'train_loss': 0.9977606667412652, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.44it/s]
100%|██████████| 1/1 [00:00<00:00, 141.21it/s]
100%|██████████| 1/1 [00:00<00:00, 165.18it/s]
Casting the dataset: 100%|██████████| 22/22 [00:00<00:00, 7328.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 22/22 [00:00<00:00, 197.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0870440006256104, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.794, 'eval_steps_per_second': 11.794, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:05,  2.00s/it]

{'eval_loss': 0.669303297996521, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.226, 'eval_steps_per_second': 12.226, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:18<00:00,  2.00s/it]

{'eval_loss': 0.6338397860527039, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.643, 'eval_steps_per_second': 11.643, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.25s/it]


{'train_runtime': 20.2834, 'train_samples_per_second': 3.254, 'train_steps_per_second': 0.444, 'train_loss': 0.9977606667412652, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 124.15it/s]
100%|██████████| 1/1 [00:00<00:00, 140.34it/s]
100%|██████████| 1/1 [00:00<00:00, 141.87it/s]
Casting the dataset: 100%|██████████| 22/22 [00:00<00:00, 11012.61 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 485.40 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 918.39 examples/s]
Map: 100%|██████████| 22/22 [00:00<00:00, 189.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.51 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0870440006256104, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0739, 'eval_samples_per_second': 13.539, 'eval_steps_per_second': 13.539, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:05,  2.00s/it]

{'eval_loss': 0.669303297996521, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0817, 'eval_samples_per_second': 12.234, 'eval_steps_per_second': 12.234, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.03s/it]

{'eval_loss': 0.6338397860527039, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.23, 'eval_steps_per_second': 12.23, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.27s/it]


{'train_runtime': 20.4427, 'train_samples_per_second': 3.229, 'train_steps_per_second': 0.44, 'train_loss': 0.9977606667412652, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.91it/s]
100%|██████████| 1/1 [00:00<00:00, 140.58it/s]
100%|██████████| 1/1 [00:00<00:00, 165.07it/s]
Casting the dataset: 100%|██████████| 23/23 [00:00<00:00, 11496.72 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.47 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 23/23 [00:00<00:00, 195.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 334.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0867429971694946, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0811, 'eval_samples_per_second': 12.325, 'eval_steps_per_second': 12.325, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:06,  2.05s/it]

{'eval_loss': 0.6696370244026184, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.782, 'eval_steps_per_second': 11.782, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.12s/it]

{'eval_loss': 0.6350336670875549, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.359, 'eval_steps_per_second': 12.359, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.32s/it]


{'train_runtime': 20.8758, 'train_samples_per_second': 3.305, 'train_steps_per_second': 0.431, 'train_loss': 0.9978307088216146, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.42it/s]
100%|██████████| 1/1 [00:00<00:00, 164.50it/s]
100%|██████████| 1/1 [00:00<00:00, 164.74it/s]
Casting the dataset: 100%|██████████| 23/23 [00:00<00:00, 11277.65 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 23/23 [00:00<00:00, 194.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 1.0867429971694946, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.369, 'eval_steps_per_second': 12.369, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:06,  2.07s/it]

{'eval_loss': 0.6696370244026184, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0909, 'eval_samples_per_second': 11.002, 'eval_steps_per_second': 11.002, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.13s/it]

{'eval_loss': 0.6350336670875549, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.195, 'eval_steps_per_second': 13.195, 'epoch': 3.0}


100%|██████████| 9/9 [00:21<00:00,  2.35s/it]


{'train_runtime': 21.1092, 'train_samples_per_second': 3.269, 'train_steps_per_second': 0.426, 'train_loss': 0.9978307088216146, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.05it/s]
100%|██████████| 1/1 [00:00<00:00, 164.98it/s]
100%|██████████| 1/1 [00:00<00:00, 164.87it/s]
Casting the dataset: 100%|██████████| 23/23 [00:00<00:00, 11484.40 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 958.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 23/23 [00:00<00:00, 184.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.69 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0867429971694946, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.368, 'eval_steps_per_second': 12.368, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:06,  2.05s/it]

{'eval_loss': 0.6696370244026184, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.373, 'eval_steps_per_second': 13.373, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.11s/it]

{'eval_loss': 0.6350336670875549, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.365, 'eval_steps_per_second': 12.365, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.31s/it]


{'train_runtime': 20.8209, 'train_samples_per_second': 3.314, 'train_steps_per_second': 0.432, 'train_loss': 0.9978307088216146, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.68it/s]
100%|██████████| 1/1 [00:00<00:00, 141.76it/s]
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
Casting the dataset: 100%|██████████| 23/23 [00:00<00:00, 11495.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 958.48 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 23/23 [00:00<00:00, 189.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.59 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0867429971694946, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.519, 'eval_steps_per_second': 12.519, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:06,  2.05s/it]

{'eval_loss': 0.6696370244026184, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0754, 'eval_samples_per_second': 13.265, 'eval_steps_per_second': 13.265, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.11s/it]

{'eval_loss': 0.6350336670875549, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0804, 'eval_samples_per_second': 12.431, 'eval_steps_per_second': 12.431, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.31s/it]


{'train_runtime': 20.8137, 'train_samples_per_second': 3.315, 'train_steps_per_second': 0.432, 'train_loss': 0.9978307088216146, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 131.20it/s]
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
Casting the dataset: 100%|██████████| 23/23 [00:00<00:00, 22985.23 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 23/23 [00:00<00:00, 184.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.0867429971694946, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.07, 'eval_steps_per_second': 12.07, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:06,  2.07s/it]

{'eval_loss': 0.6696370244026184, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.692, 'eval_steps_per_second': 12.692, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.12s/it]

{'eval_loss': 0.6350336670875549, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0777, 'eval_samples_per_second': 12.863, 'eval_steps_per_second': 12.863, 'epoch': 3.0}


100%|██████████| 9/9 [00:20<00:00,  2.32s/it]


{'train_runtime': 20.9074, 'train_samples_per_second': 3.3, 'train_steps_per_second': 0.43, 'train_loss': 0.9978307088216146, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.49it/s]
100%|██████████| 1/1 [00:00<00:00, 164.99it/s]
100%|██████████| 1/1 [00:00<00:00, 165.00it/s]
Casting the dataset: 100%|██████████| 24/24 [00:00<00:00, 11990.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 195.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 318.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 316.10 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.0861518383026123, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.512, 'eval_steps_per_second': 11.512, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:13<00:06,  2.16s/it]

{'eval_loss': 0.6690165996551514, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.055, 'eval_steps_per_second': 12.055, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:20<00:00,  2.22s/it]

{'eval_loss': 0.6346868872642517, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.928, 'eval_steps_per_second': 11.928, 'epoch': 3.0}


100%|██████████| 9/9 [00:21<00:00,  2.40s/it]


{'train_runtime': 21.6069, 'train_samples_per_second': 3.332, 'train_steps_per_second': 0.417, 'train_loss': 0.997843000623915, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 140.62it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 165.16it/s]
Casting the dataset: 100%|██████████| 24/24 [00:00<00:00, 11992.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 173.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0861518383026123, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.855, 'eval_steps_per_second': 12.855, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:13<00:06,  2.16s/it]

{'eval_loss': 0.6690165996551514, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.222, 'eval_steps_per_second': 12.222, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:20<00:00,  2.18s/it]

{'eval_loss': 0.6346868872642517, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.199, 'eval_steps_per_second': 12.199, 'epoch': 3.0}


100%|██████████| 9/9 [00:21<00:00,  2.38s/it]


{'train_runtime': 21.4479, 'train_samples_per_second': 3.357, 'train_steps_per_second': 0.42, 'train_loss': 0.997843000623915, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.27it/s]
100%|██████████| 1/1 [00:00<00:00, 141.24it/s]
100%|██████████| 1/1 [00:00<00:00, 164.84it/s]
Casting the dataset: 100%|██████████| 24/24 [00:00<00:00, 11718.66 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 178.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0861518383026123, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.062, 'eval_steps_per_second': 12.062, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:13<00:06,  2.18s/it]

{'eval_loss': 0.6690165996551514, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.038, 'eval_steps_per_second': 12.038, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:20<00:00,  2.22s/it]

{'eval_loss': 0.6346868872642517, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.071, 'eval_steps_per_second': 12.071, 'epoch': 3.0}


100%|██████████| 9/9 [00:21<00:00,  2.40s/it]


{'train_runtime': 21.638, 'train_samples_per_second': 3.327, 'train_steps_per_second': 0.416, 'train_loss': 0.997843000623915, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.39it/s]
100%|██████████| 1/1 [00:00<00:00, 139.96it/s]
100%|██████████| 1/1 [00:00<00:00, 142.82it/s]
Casting the dataset: 100%|██████████| 24/24 [00:00<00:00, 12006.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 958.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.42 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 178.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 1.0861518383026123, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.023, 'eval_steps_per_second': 13.023, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:12<00:06,  2.12s/it]

{'eval_loss': 0.6690165996551514, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.785, 'eval_steps_per_second': 11.785, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:19<00:00,  2.20s/it]

{'eval_loss': 0.6346868872642517, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.079, 'eval_steps_per_second': 12.079, 'epoch': 3.0}


100%|██████████| 9/9 [00:21<00:00,  2.37s/it]


{'train_runtime': 21.3159, 'train_samples_per_second': 3.378, 'train_steps_per_second': 0.422, 'train_loss': 0.997843000623915, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 109.63it/s]
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
100%|██████████| 1/1 [00:00<00:00, 166.74it/s]
Casting the dataset: 100%|██████████| 24/24 [00:00<00:00, 11737.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 183.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 313.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.64 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 1.0861518383026123, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0909, 'eval_samples_per_second': 10.998, 'eval_steps_per_second': 10.998, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 6/9 [00:13<00:06,  2.18s/it]

{'eval_loss': 0.6690165996551514, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.531, 'eval_steps_per_second': 12.531, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 9/9 [00:20<00:00,  2.21s/it]

{'eval_loss': 0.6346868872642517, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0899, 'eval_samples_per_second': 11.125, 'eval_steps_per_second': 11.125, 'epoch': 3.0}


100%|██████████| 9/9 [00:21<00:00,  2.41s/it]


{'train_runtime': 21.6853, 'train_samples_per_second': 3.32, 'train_steps_per_second': 0.415, 'train_loss': 0.997843000623915, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 30.93it/s]
100%|██████████| 1/1 [00:00<00:00, 141.26it/s]
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
Casting the dataset: 100%|██████████| 25/25 [00:00<00:00, 12500.91 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 25/25 [00:00<00:00, 180.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.7882372140884399, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.787, 'eval_steps_per_second': 11.787, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.59s/it]

{'eval_loss': 0.6136218309402466, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.928, 'eval_steps_per_second': 11.928, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:21<00:00,  1.59s/it]

{'eval_loss': 0.6118648052215576, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.024, 'eval_steps_per_second': 13.024, 'epoch': 3.0}


100%|██████████| 12/12 [00:23<00:00,  1.94s/it]


{'train_runtime': 23.239, 'train_samples_per_second': 3.227, 'train_steps_per_second': 0.516, 'train_loss': 0.8484908739725748, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.19it/s]
100%|██████████| 1/1 [00:00<00:00, 141.57it/s]
100%|██████████| 1/1 [00:00<00:00, 165.12it/s]
Casting the dataset: 100%|██████████| 25/25 [00:00<00:00, 24972.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 25/25 [00:00<00:00, 202.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8088063597679138, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.851, 'eval_steps_per_second': 11.851, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.62s/it]

{'eval_loss': 0.5500465035438538, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0864, 'eval_samples_per_second': 11.57, 'eval_steps_per_second': 11.57, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:22<00:00,  1.62s/it]

{'eval_loss': 0.5352582335472107, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.516, 'eval_steps_per_second': 11.516, 'epoch': 3.0}


100%|██████████| 12/12 [00:23<00:00,  2.00s/it]


{'train_runtime': 23.9561, 'train_samples_per_second': 3.131, 'train_steps_per_second': 0.501, 'train_loss': 0.9475723107655843, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
100%|██████████| 1/1 [00:00<00:00, 165.01it/s]
Casting the dataset: 100%|██████████| 25/25 [00:00<00:00, 12502.40 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 25/25 [00:00<00:00, 176.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8088063597679138, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.773, 'eval_steps_per_second': 11.773, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.60s/it]

{'eval_loss': 0.5500465035438538, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1319, 'eval_samples_per_second': 7.581, 'eval_steps_per_second': 7.581, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:22<00:00,  1.62s/it]

{'eval_loss': 0.5352582335472107, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.214, 'eval_steps_per_second': 12.214, 'epoch': 3.0}


100%|██████████| 12/12 [00:23<00:00,  1.97s/it]


{'train_runtime': 23.692, 'train_samples_per_second': 3.166, 'train_steps_per_second': 0.507, 'train_loss': 0.9475723107655843, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
100%|██████████| 1/1 [00:00<00:00, 141.30it/s]
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
Casting the dataset: 100%|██████████| 25/25 [00:00<00:00, 12496.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 25/25 [00:00<00:00, 170.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.81 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8088063597679138, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0842, 'eval_samples_per_second': 11.879, 'eval_steps_per_second': 11.879, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.62s/it]

{'eval_loss': 0.5500465035438538, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.914, 'eval_steps_per_second': 11.914, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:22<00:00,  1.65s/it]

{'eval_loss': 0.5352582335472107, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.212, 'eval_steps_per_second': 12.212, 'epoch': 3.0}


100%|██████████| 12/12 [00:23<00:00,  1.99s/it]


{'train_runtime': 23.8433, 'train_samples_per_second': 3.146, 'train_steps_per_second': 0.503, 'train_loss': 0.9475723107655843, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 110.14it/s]
100%|██████████| 1/1 [00:00<00:00, 141.62it/s]
100%|██████████| 1/1 [00:00<00:00, 141.80it/s]
Casting the dataset: 100%|██████████| 25/25 [00:00<00:00, 12261.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.76 examples/s]
Map: 100%|██████████| 25/25 [00:00<00:00, 182.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8088063597679138, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.062, 'eval_steps_per_second': 12.062, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.60s/it]

{'eval_loss': 0.5500465035438538, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0784, 'eval_samples_per_second': 12.763, 'eval_steps_per_second': 12.763, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:22<00:00,  1.63s/it]

{'eval_loss': 0.5352582335472107, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.781, 'eval_steps_per_second': 11.781, 'epoch': 3.0}


100%|██████████| 12/12 [00:23<00:00,  1.97s/it]


{'train_runtime': 23.653, 'train_samples_per_second': 3.171, 'train_steps_per_second': 0.507, 'train_loss': 0.9475723107655843, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.77it/s]
100%|██████████| 1/1 [00:00<00:00, 141.54it/s]
100%|██████████| 1/1 [00:00<00:00, 165.10it/s]
Casting the dataset: 100%|██████████| 26/26 [00:00<00:00, 12763.57 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 965.54 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 26/26 [00:00<00:00, 188.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.53 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.48 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8645617961883545, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.372, 'eval_steps_per_second': 12.372, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.68s/it]

{'eval_loss': 0.5642090439796448, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.995, 'eval_steps_per_second': 11.995, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:22<00:00,  1.71s/it]

{'eval_loss': 0.5446426868438721, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.365, 'eval_steps_per_second': 12.365, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.03s/it]


{'train_runtime': 24.3172, 'train_samples_per_second': 3.208, 'train_steps_per_second': 0.493, 'train_loss': 0.9663189252217611, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.99it/s]
100%|██████████| 1/1 [00:00<00:00, 165.14it/s]
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
Casting the dataset: 100%|██████████| 26/26 [00:00<00:00, 12990.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 26/26 [00:00<00:00, 162.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8645617961883545, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.223, 'eval_steps_per_second': 12.223, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.66s/it]

{'eval_loss': 0.5642090439796448, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.521, 'eval_steps_per_second': 12.521, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:22<00:00,  1.68s/it]

{'eval_loss': 0.5446426868438721, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.363, 'eval_steps_per_second': 12.363, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.01s/it]


{'train_runtime': 24.1032, 'train_samples_per_second': 3.236, 'train_steps_per_second': 0.498, 'train_loss': 0.9663189252217611, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 164.34it/s]
Casting the dataset: 100%|██████████| 26/26 [00:00<00:00, 12732.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 965.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 26/26 [00:00<00:00, 174.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8645617961883545, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.366, 'eval_steps_per_second': 12.366, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.68s/it]

{'eval_loss': 0.5642090439796448, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.377, 'eval_steps_per_second': 11.377, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:22<00:00,  1.69s/it]

{'eval_loss': 0.5446426868438721, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.066, 'eval_steps_per_second': 12.066, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.01s/it]


{'train_runtime': 24.1243, 'train_samples_per_second': 3.233, 'train_steps_per_second': 0.497, 'train_loss': 0.9663189252217611, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
100%|██████████| 1/1 [00:00<00:00, 141.66it/s]
100%|██████████| 1/1 [00:00<00:00, 164.83it/s]
Casting the dataset: 100%|██████████| 26/26 [00:00<00:00, 12970.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.19 examples/s]
Map: 100%|██████████| 26/26 [00:00<00:00, 180.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 331.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8645617961883545, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.081, 'eval_samples_per_second': 12.347, 'eval_steps_per_second': 12.347, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.63s/it]

{'eval_loss': 0.5642090439796448, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.012, 'eval_steps_per_second': 13.012, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:22<00:00,  1.68s/it]

{'eval_loss': 0.5446426868438721, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.068, 'eval_steps_per_second': 12.068, 'epoch': 3.0}


100%|██████████| 12/12 [00:23<00:00,  1.99s/it]


{'train_runtime': 23.8488, 'train_samples_per_second': 3.271, 'train_steps_per_second': 0.503, 'train_loss': 0.9663189252217611, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 164.89it/s]
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
Casting the dataset: 100%|██████████| 26/26 [00:00<00:00, 12993.20 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 26/26 [00:00<00:00, 164.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.48 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8645617961883545, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.022, 'eval_steps_per_second': 13.022, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.69s/it]

{'eval_loss': 0.5642090439796448, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.19, 'eval_steps_per_second': 13.19, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:22<00:00,  1.71s/it]

{'eval_loss': 0.5446426868438721, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.775, 'eval_steps_per_second': 11.775, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.03s/it]


{'train_runtime': 24.3321, 'train_samples_per_second': 3.206, 'train_steps_per_second': 0.493, 'train_loss': 0.9663189252217611, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
100%|██████████| 1/1 [00:00<00:00, 140.76it/s]
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
Casting the dataset: 100%|██████████| 27/27 [00:00<00:00, 13509.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Map: 100%|██████████| 27/27 [00:00<00:00, 142.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.89 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8611407279968262, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.054, 'eval_steps_per_second': 12.054, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.70s/it]

{'eval_loss': 0.5627971887588501, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1037, 'eval_samples_per_second': 9.64, 'eval_steps_per_second': 9.64, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:22<00:00,  1.71s/it]

{'eval_loss': 0.5436302423477173, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1313, 'eval_samples_per_second': 7.615, 'eval_steps_per_second': 7.615, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.01s/it]


{'train_runtime': 24.1715, 'train_samples_per_second': 3.351, 'train_steps_per_second': 0.496, 'train_loss': 0.9632206757863363, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.41it/s]
100%|██████████| 1/1 [00:00<00:00, 141.62it/s]
100%|██████████| 1/1 [00:00<00:00, 164.82it/s]
Casting the dataset: 100%|██████████| 27/27 [00:00<00:00, 13496.15 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 952.82 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 27/27 [00:00<00:00, 165.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.24 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8611407279968262, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.92, 'eval_steps_per_second': 11.92, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.69s/it]

{'eval_loss': 0.5627971887588501, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.062, 'eval_steps_per_second': 12.062, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.77s/it]

{'eval_loss': 0.5436302423477173, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.908, 'eval_steps_per_second': 11.908, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.04s/it]


{'train_runtime': 24.4659, 'train_samples_per_second': 3.311, 'train_steps_per_second': 0.49, 'train_loss': 0.9632206757863363, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.51it/s]
100%|██████████| 1/1 [00:00<00:00, 124.03it/s]
100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
Casting the dataset: 100%|██████████| 27/27 [00:00<00:00, 13265.34 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 27/27 [00:00<00:00, 152.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8611407279968262, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0899, 'eval_samples_per_second': 11.118, 'eval_steps_per_second': 11.118, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:06,  1.72s/it]

{'eval_loss': 0.5627971887588501, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0891, 'eval_samples_per_second': 11.227, 'eval_steps_per_second': 11.227, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.74s/it]

{'eval_loss': 0.5436302423477173, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.216, 'eval_steps_per_second': 12.216, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.05s/it]


{'train_runtime': 24.616, 'train_samples_per_second': 3.291, 'train_steps_per_second': 0.487, 'train_loss': 0.9632206757863363, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
100%|██████████| 1/1 [00:00<00:00, 141.59it/s]
100%|██████████| 1/1 [00:00<00:00, 141.53it/s]
Casting the dataset: 100%|██████████| 27/27 [00:00<00:00, 13500.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Map: 100%|██████████| 27/27 [00:00<00:00, 168.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8611407279968262, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.78, 'eval_steps_per_second': 11.78, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:14<00:06,  1.69s/it]

{'eval_loss': 0.5627971887588501, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.776, 'eval_steps_per_second': 11.776, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:22<00:00,  1.73s/it]

{'eval_loss': 0.5436302423477173, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1381, 'eval_samples_per_second': 7.239, 'eval_steps_per_second': 7.239, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.02s/it]


{'train_runtime': 24.2847, 'train_samples_per_second': 3.335, 'train_steps_per_second': 0.494, 'train_loss': 0.9632206757863363, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
100%|██████████| 1/1 [00:00<00:00, 141.79it/s]
100%|██████████| 1/1 [00:00<00:00, 165.08it/s]
Casting the dataset: 100%|██████████| 27/27 [00:00<00:00, 13500.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.26 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Map: 100%|██████████| 27/27 [00:00<00:00, 177.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 324.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8611407279968262, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.517, 'eval_steps_per_second': 12.517, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:06,  1.72s/it]

{'eval_loss': 0.5627971887588501, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.648, 'eval_steps_per_second': 11.648, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.74s/it]

{'eval_loss': 0.5436302423477173, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.617, 'eval_steps_per_second': 11.617, 'epoch': 3.0}


100%|██████████| 12/12 [00:24<00:00,  2.05s/it]


{'train_runtime': 24.5663, 'train_samples_per_second': 3.297, 'train_steps_per_second': 0.488, 'train_loss': 0.9632206757863363, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
100%|██████████| 1/1 [00:00<00:00, 141.30it/s]
Casting the dataset: 100%|██████████| 28/28 [00:00<00:00, 14006.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 167.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8581441044807434, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1294, 'eval_samples_per_second': 7.727, 'eval_steps_per_second': 7.727, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.81s/it]

{'eval_loss': 0.562042772769928, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1312, 'eval_samples_per_second': 7.624, 'eval_steps_per_second': 7.624, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.85s/it]

{'eval_loss': 0.5436981916427612, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1303, 'eval_samples_per_second': 7.673, 'eval_steps_per_second': 7.673, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.11s/it]


{'train_runtime': 25.3288, 'train_samples_per_second': 3.316, 'train_steps_per_second': 0.474, 'train_loss': 0.9606109460194906, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.86it/s]
100%|██████████| 1/1 [00:00<00:00, 141.66it/s]
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
Casting the dataset: 100%|██████████| 28/28 [00:00<00:00, 13996.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 171.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.70 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8581441044807434, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1256, 'eval_samples_per_second': 7.964, 'eval_steps_per_second': 7.964, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.79s/it]

{'eval_loss': 0.562042772769928, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.101, 'eval_samples_per_second': 9.896, 'eval_steps_per_second': 9.896, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.82s/it]

{'eval_loss': 0.5436981916427612, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1718, 'eval_samples_per_second': 5.822, 'eval_steps_per_second': 5.822, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.10s/it]


{'train_runtime': 25.1627, 'train_samples_per_second': 3.338, 'train_steps_per_second': 0.477, 'train_loss': 0.9606109460194906, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 141.79it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
Casting the dataset: 100%|██████████| 28/28 [00:00<00:00, 13991.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 176.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8581441044807434, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1243, 'eval_samples_per_second': 8.046, 'eval_steps_per_second': 8.046, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.82s/it]

{'eval_loss': 0.562042772769928, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1413, 'eval_samples_per_second': 7.075, 'eval_steps_per_second': 7.075, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.86s/it]

{'eval_loss': 0.5436981916427612, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.519, 'eval_steps_per_second': 11.519, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.12s/it]


{'train_runtime': 25.4096, 'train_samples_per_second': 3.306, 'train_steps_per_second': 0.472, 'train_loss': 0.9606109460194906, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.99it/s]
100%|██████████| 1/1 [00:00<00:00, 141.59it/s]
100%|██████████| 1/1 [00:00<00:00, 165.21it/s]
Casting the dataset: 100%|██████████| 28/28 [00:00<00:00, 13999.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.96 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 172.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8581441044807434, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1325, 'eval_samples_per_second': 7.55, 'eval_steps_per_second': 7.55, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.82s/it]

{'eval_loss': 0.562042772769928, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1392, 'eval_samples_per_second': 7.184, 'eval_steps_per_second': 7.184, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.81s/it]

{'eval_loss': 0.5436981916427612, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1253, 'eval_samples_per_second': 7.983, 'eval_steps_per_second': 7.983, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.11s/it]


{'train_runtime': 25.3147, 'train_samples_per_second': 3.318, 'train_steps_per_second': 0.474, 'train_loss': 0.9606109460194906, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 140.97it/s]
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
100%|██████████| 1/1 [00:00<00:00, 141.13it/s]
Casting the dataset: 100%|██████████| 28/28 [00:00<00:00, 13986.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 170.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8581441044807434, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1264, 'eval_samples_per_second': 7.914, 'eval_steps_per_second': 7.914, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.77s/it]

{'eval_loss': 0.562042772769928, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1393, 'eval_samples_per_second': 7.181, 'eval_steps_per_second': 7.181, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.85s/it]

{'eval_loss': 0.5436981916427612, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.141, 'eval_samples_per_second': 7.094, 'eval_steps_per_second': 7.094, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.11s/it]


{'train_runtime': 25.2817, 'train_samples_per_second': 3.323, 'train_steps_per_second': 0.475, 'train_loss': 0.9606109460194906, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.65it/s]
100%|██████████| 1/1 [00:00<00:00, 141.16it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 29/29 [00:00<00:00, 14483.78 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 964.65 examples/s]
Map: 100%|██████████| 29/29 [00:00<00:00, 158.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.42 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8553147912025452, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0943, 'eval_samples_per_second': 10.602, 'eval_steps_per_second': 10.602, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.85s/it]

{'eval_loss': 0.5613065361976624, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.78, 'eval_steps_per_second': 11.78, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.85s/it]

{'eval_loss': 0.5437659025192261, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.367, 'eval_steps_per_second': 12.367, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.11s/it]


{'train_runtime': 25.3335, 'train_samples_per_second': 3.434, 'train_steps_per_second': 0.474, 'train_loss': 0.9583215713500977, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.29it/s]
100%|██████████| 1/1 [00:00<00:00, 142.91it/s]
100%|██████████| 1/1 [00:00<00:00, 141.13it/s]
Casting the dataset: 100%|██████████| 29/29 [00:00<00:00, 14490.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 484.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 940.22 examples/s]
Map: 100%|██████████| 29/29 [00:00<00:00, 158.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8553147912025452, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0734, 'eval_samples_per_second': 13.633, 'eval_steps_per_second': 13.633, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.86s/it]

{'eval_loss': 0.5613065361976624, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.225, 'eval_steps_per_second': 12.225, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.93s/it]

{'eval_loss': 0.5437659025192261, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.221, 'eval_steps_per_second': 12.221, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.16s/it]


{'train_runtime': 25.8685, 'train_samples_per_second': 3.363, 'train_steps_per_second': 0.464, 'train_loss': 0.9583215713500977, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.23it/s]
100%|██████████| 1/1 [00:00<00:00, 141.26it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 29/29 [00:00<00:00, 28981.37 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 966.65 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 29/29 [00:00<00:00, 164.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.24 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.80 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8553147912025452, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.219, 'eval_steps_per_second': 12.219, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.82s/it]

{'eval_loss': 0.5613065361976624, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.064, 'eval_steps_per_second': 12.064, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:23<00:00,  1.88s/it]

{'eval_loss': 0.5437659025192261, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.854, 'eval_steps_per_second': 12.854, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.10s/it]


{'train_runtime': 25.257, 'train_samples_per_second': 3.445, 'train_steps_per_second': 0.475, 'train_loss': 0.9583215713500977, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 140.47it/s]
100%|██████████| 1/1 [00:00<00:00, 141.54it/s]
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
Casting the dataset: 100%|██████████| 29/29 [00:00<00:00, 14504.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 485.17 examples/s]
Map: 100%|██████████| 29/29 [00:00<00:00, 158.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 324.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8553147912025452, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.008, 'eval_steps_per_second': 13.008, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.84s/it]

{'eval_loss': 0.5613065361976624, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.186, 'eval_steps_per_second': 13.186, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.87s/it]

{'eval_loss': 0.5437659025192261, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0764, 'eval_samples_per_second': 13.09, 'eval_steps_per_second': 13.09, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.13s/it]


{'train_runtime': 25.54, 'train_samples_per_second': 3.406, 'train_steps_per_second': 0.47, 'train_loss': 0.9583215713500977, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.32it/s]
100%|██████████| 1/1 [00:00<00:00, 141.34it/s]
100%|██████████| 1/1 [00:00<00:00, 164.69it/s]
Casting the dataset: 100%|██████████| 29/29 [00:00<00:00, 14184.82 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 552.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 29/29 [00:00<00:00, 159.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8553147912025452, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.919, 'eval_steps_per_second': 11.919, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:15<00:07,  1.87s/it]

{'eval_loss': 0.5613065361976624, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.775, 'eval_steps_per_second': 11.775, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.88s/it]

{'eval_loss': 0.5437659025192261, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.22, 'eval_steps_per_second': 12.22, 'epoch': 3.0}


100%|██████████| 12/12 [00:25<00:00,  2.13s/it]


{'train_runtime': 25.5997, 'train_samples_per_second': 3.398, 'train_steps_per_second': 0.469, 'train_loss': 0.9583215713500977, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
100%|██████████| 1/1 [00:00<00:00, 141.68it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 30/30 [00:00<00:00, 14983.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 154.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8536486029624939, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.637, 'eval_steps_per_second': 11.637, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.97s/it]

{'eval_loss': 0.5601426959037781, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0767, 'eval_samples_per_second': 13.03, 'eval_steps_per_second': 13.03, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  1.98s/it]

{'eval_loss': 0.5430213809013367, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.083, 'eval_steps_per_second': 12.083, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.20s/it]


{'train_runtime': 26.442, 'train_samples_per_second': 3.404, 'train_steps_per_second': 0.454, 'train_loss': 0.9608376026153564, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 141.43it/s]
100%|██████████| 1/1 [00:00<00:00, 153.67it/s]
Casting the dataset: 100%|██████████| 30/30 [00:00<00:00, 14999.30 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 158.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.37 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8536486029624939, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.37, 'eval_steps_per_second': 12.37, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.91s/it]

{'eval_loss': 0.5601426959037781, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.368, 'eval_steps_per_second': 12.368, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.95s/it]

{'eval_loss': 0.5430213809013367, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.512, 'eval_steps_per_second': 11.512, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.18s/it]


{'train_runtime': 26.1669, 'train_samples_per_second': 3.439, 'train_steps_per_second': 0.459, 'train_loss': 0.9608376026153564, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 155.44it/s]
100%|██████████| 1/1 [00:00<00:00, 141.36it/s]
100%|██████████| 1/1 [00:00<00:00, 164.52it/s]
Casting the dataset: 100%|██████████| 30/30 [00:00<00:00, 14999.30 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 161.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.82 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8536486029624939, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.38, 'eval_steps_per_second': 11.38, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.96s/it]

{'eval_loss': 0.5601426959037781, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.524, 'eval_steps_per_second': 12.524, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  1.98s/it]

{'eval_loss': 0.5430213809013367, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.376, 'eval_steps_per_second': 13.376, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.20s/it]


{'train_runtime': 26.4524, 'train_samples_per_second': 3.402, 'train_steps_per_second': 0.454, 'train_loss': 0.9608376026153564, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
100%|██████████| 1/1 [00:00<00:00, 164.87it/s]
Casting the dataset: 100%|██████████| 30/30 [00:00<00:00, 14680.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.63 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 168.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 331.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 314.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8536486029624939, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.764, 'eval_steps_per_second': 11.764, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.93s/it]

{'eval_loss': 0.5601426959037781, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.637, 'eval_steps_per_second': 11.637, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.96s/it]

{'eval_loss': 0.5430213809013367, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.792, 'eval_steps_per_second': 11.792, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.18s/it]


{'train_runtime': 26.2024, 'train_samples_per_second': 3.435, 'train_steps_per_second': 0.458, 'train_loss': 0.9608376026153564, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.59it/s]
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
Casting the dataset: 100%|██████████| 30/30 [00:00<00:00, 15010.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 158.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8536486029624939, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.378, 'eval_steps_per_second': 12.378, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.96s/it]

{'eval_loss': 0.5601426959037781, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.06, 'eval_steps_per_second': 12.06, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.97s/it]

{'eval_loss': 0.5430213809013367, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.369, 'eval_steps_per_second': 13.369, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.20s/it]


{'train_runtime': 26.3545, 'train_samples_per_second': 3.415, 'train_steps_per_second': 0.455, 'train_loss': 0.9608376026153564, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.62it/s]
100%|██████████| 1/1 [00:00<00:00, 162.70it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 31/31 [00:00<00:00, 15499.28 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 31/31 [00:00<00:00, 163.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.42 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.851457417011261, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.367, 'eval_steps_per_second': 12.367, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  2.00s/it]

{'eval_loss': 0.5590280294418335, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.201, 'eval_steps_per_second': 13.201, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.00s/it]

{'eval_loss': 0.542288064956665, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.303, 'eval_steps_per_second': 12.303, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.21s/it]


{'train_runtime': 26.5057, 'train_samples_per_second': 3.509, 'train_steps_per_second': 0.453, 'train_loss': 0.9595862229665121, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.49it/s]
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
100%|██████████| 1/1 [00:00<00:00, 164.98it/s]
Casting the dataset: 100%|██████████| 31/31 [00:00<00:00, 15495.58 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 484.44 examples/s]
Map: 100%|██████████| 31/31 [00:00<00:00, 153.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.851457417011261, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1141, 'eval_samples_per_second': 8.766, 'eval_steps_per_second': 8.766, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.97s/it]

{'eval_loss': 0.5590280294418335, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.647, 'eval_steps_per_second': 11.647, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:24<00:00,  1.98s/it]

{'eval_loss': 0.542288064956665, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0753, 'eval_samples_per_second': 13.277, 'eval_steps_per_second': 13.277, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.19s/it]


{'train_runtime': 26.3191, 'train_samples_per_second': 3.534, 'train_steps_per_second': 0.456, 'train_loss': 0.9595862229665121, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.66it/s]
100%|██████████| 1/1 [00:00<00:00, 164.83it/s]
100%|██████████| 1/1 [00:00<00:00, 164.56it/s]
Casting the dataset: 100%|██████████| 31/31 [00:00<00:00, 15501.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Map: 100%|██████████| 31/31 [00:00<00:00, 167.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.851457417011261, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.642, 'eval_steps_per_second': 11.642, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.98s/it]

{'eval_loss': 0.5590280294418335, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.222, 'eval_steps_per_second': 12.222, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.01s/it]

{'eval_loss': 0.542288064956665, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.022, 'eval_steps_per_second': 13.022, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.21s/it]


{'train_runtime': 26.5075, 'train_samples_per_second': 3.508, 'train_steps_per_second': 0.453, 'train_loss': 0.9595862229665121, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.51it/s]
100%|██████████| 1/1 [00:00<00:00, 141.54it/s]
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
Casting the dataset: 100%|██████████| 31/31 [00:00<00:00, 15493.73 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 31/31 [00:00<00:00, 166.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.851457417011261, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.195, 'eval_steps_per_second': 13.195, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.97s/it]

{'eval_loss': 0.5590280294418335, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.358, 'eval_steps_per_second': 12.358, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.00s/it]

{'eval_loss': 0.542288064956665, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.13, 'eval_steps_per_second': 12.13, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.21s/it]


{'train_runtime': 26.4698, 'train_samples_per_second': 3.513, 'train_steps_per_second': 0.453, 'train_loss': 0.9595862229665121, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
100%|██████████| 1/1 [00:00<00:00, 141.80it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 31/31 [00:00<00:00, 15501.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 31/31 [00:00<00:00, 159.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.851457417011261, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.132, 'eval_steps_per_second': 12.132, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:07,  1.99s/it]

{'eval_loss': 0.5590280294418335, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.087, 'eval_samples_per_second': 11.49, 'eval_steps_per_second': 11.49, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.02s/it]

{'eval_loss': 0.542288064956665, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.222, 'eval_steps_per_second': 12.222, 'epoch': 3.0}


100%|██████████| 12/12 [00:26<00:00,  2.22s/it]


{'train_runtime': 26.6693, 'train_samples_per_second': 3.487, 'train_steps_per_second': 0.45, 'train_loss': 0.9595862229665121, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.19it/s]
100%|██████████| 1/1 [00:00<00:00, 165.03it/s]
100%|██████████| 1/1 [00:00<00:00, 164.12it/s]
Casting the dataset: 100%|██████████| 32/32 [00:00<00:00, 15982.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.98 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 157.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8502523303031921, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.638, 'eval_steps_per_second': 11.638, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:08,  2.07s/it]

{'eval_loss': 0.5579514503479004, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.011, 'eval_steps_per_second': 13.011, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.05s/it]

{'eval_loss': 0.541242241859436, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.219, 'eval_steps_per_second': 12.219, 'epoch': 3.0}


100%|██████████| 12/12 [00:27<00:00,  2.26s/it]


{'train_runtime': 27.0883, 'train_samples_per_second': 3.544, 'train_steps_per_second': 0.443, 'train_loss': 0.9586817423502604, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.88it/s]
100%|██████████| 1/1 [00:00<00:00, 141.43it/s]
100%|██████████| 1/1 [00:00<00:00, 164.83it/s]
Casting the dataset: 100%|██████████| 32/32 [00:00<00:00, 32063.48 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.22 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 157.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8502523303031921, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.93, 'eval_steps_per_second': 11.93, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:08,  2.04s/it]

{'eval_loss': 0.5579514503479004, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0777, 'eval_samples_per_second': 12.863, 'eval_steps_per_second': 12.863, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.06s/it]

{'eval_loss': 0.541242241859436, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.081, 'eval_steps_per_second': 12.081, 'epoch': 3.0}


100%|██████████| 12/12 [00:27<00:00,  2.25s/it]


{'train_runtime': 27.0399, 'train_samples_per_second': 3.55, 'train_steps_per_second': 0.444, 'train_loss': 0.9586817423502604, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.71it/s]
100%|██████████| 1/1 [00:00<00:00, 164.98it/s]
100%|██████████| 1/1 [00:00<00:00, 152.60it/s]
Casting the dataset: 100%|██████████| 32/32 [00:00<00:00, 15995.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.79 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 159.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8502523303031921, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.052, 'eval_steps_per_second': 12.052, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:08,  2.05s/it]

{'eval_loss': 0.5579514503479004, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.782, 'eval_steps_per_second': 11.782, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.07s/it]

{'eval_loss': 0.541242241859436, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.215, 'eval_steps_per_second': 12.215, 'epoch': 3.0}


100%|██████████| 12/12 [00:27<00:00,  2.25s/it]


{'train_runtime': 27.0529, 'train_samples_per_second': 3.549, 'train_steps_per_second': 0.444, 'train_loss': 0.9586817423502604, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 141.77it/s]
100%|██████████| 1/1 [00:00<00:00, 165.11it/s]
Casting the dataset: 100%|██████████| 32/32 [00:00<00:00, 15721.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.25 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 149.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8502523303031921, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0873, 'eval_samples_per_second': 11.459, 'eval_steps_per_second': 11.459, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:08,  2.03s/it]

{'eval_loss': 0.5579514503479004, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.062, 'eval_steps_per_second': 12.062, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.07s/it]

{'eval_loss': 0.541242241859436, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.635, 'eval_steps_per_second': 11.635, 'epoch': 3.0}


100%|██████████| 12/12 [00:27<00:00,  2.25s/it]


{'train_runtime': 27.007, 'train_samples_per_second': 3.555, 'train_steps_per_second': 0.444, 'train_loss': 0.9586817423502604, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.70it/s]
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 164.82it/s]
Casting the dataset: 100%|██████████| 32/32 [00:00<00:00, 15997.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 153.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8502523303031921, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.551, 'eval_steps_per_second': 11.551, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 8/12 [00:16<00:08,  2.05s/it]

{'eval_loss': 0.5579514503479004, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.024, 'eval_steps_per_second': 13.024, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 12/12 [00:25<00:00,  2.09s/it]

{'eval_loss': 0.541242241859436, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0941, 'eval_samples_per_second': 10.628, 'eval_steps_per_second': 10.628, 'epoch': 3.0}


100%|██████████| 12/12 [00:27<00:00,  2.26s/it]


{'train_runtime': 27.1326, 'train_samples_per_second': 3.538, 'train_steps_per_second': 0.442, 'train_loss': 0.9586817423502604, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
100%|██████████| 1/1 [00:00<00:00, 141.85it/s]
100%|██████████| 1/1 [00:00<00:00, 164.88it/s]
Casting the dataset: 100%|██████████| 33/33 [00:00<00:00, 16513.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 33/33 [00:00<00:00, 147.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6667808294296265, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.766, 'eval_steps_per_second': 11.766, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:17<00:07,  1.54s/it]

{'eval_loss': 0.5173430442810059, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.716, 'eval_steps_per_second': 11.716, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:27<00:00,  1.58s/it]

{'eval_loss': 0.5017462968826294, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.929, 'eval_steps_per_second': 11.929, 'epoch': 3.0}


100%|██████████| 15/15 [00:28<00:00,  1.89s/it]


{'train_runtime': 28.3078, 'train_samples_per_second': 3.497, 'train_steps_per_second': 0.53, 'train_loss': 0.8256768544514974, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.57it/s]
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
100%|██████████| 1/1 [00:00<00:00, 164.19it/s]
Casting the dataset: 100%|██████████| 33/33 [00:00<00:00, 11008.67 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 33/33 [00:00<00:00, 148.24 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.85 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8511841893196106, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.278, 'eval_steps_per_second': 12.278, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:07,  1.57s/it]

{'eval_loss': 0.6723594069480896, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.838, 'eval_steps_per_second': 12.838, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:27<00:00,  1.59s/it]

{'eval_loss': 0.670390248298645, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.218, 'eval_steps_per_second': 12.218, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.95s/it]


{'train_runtime': 29.3009, 'train_samples_per_second': 3.379, 'train_steps_per_second': 0.512, 'train_loss': 0.8904677708943685, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.23it/s]
100%|██████████| 1/1 [00:00<00:00, 164.72it/s]
100%|██████████| 1/1 [00:00<00:00, 164.29it/s]
Casting the dataset: 100%|██████████| 33/33 [00:00<00:00, 16499.23 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 33/33 [00:00<00:00, 153.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8511841893196106, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0853, 'eval_samples_per_second': 11.723, 'eval_steps_per_second': 11.723, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:17<00:07,  1.57s/it]

{'eval_loss': 0.6723594069480896, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.202, 'eval_steps_per_second': 12.202, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:27<00:00,  1.59s/it]

{'eval_loss': 0.670390248298645, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.919, 'eval_steps_per_second': 11.919, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.94s/it]


{'train_runtime': 29.0322, 'train_samples_per_second': 3.41, 'train_steps_per_second': 0.517, 'train_loss': 0.8904677708943685, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.63it/s]
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
100%|██████████| 1/1 [00:00<00:00, 141.59it/s]
Casting the dataset: 100%|██████████| 33/33 [00:00<00:00, 16139.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 958.26 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 33/33 [00:00<00:00, 151.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.39 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8511841893196106, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.85, 'eval_steps_per_second': 11.85, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:17<00:07,  1.55s/it]

{'eval_loss': 0.6723594069480896, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0883, 'eval_samples_per_second': 11.324, 'eval_steps_per_second': 11.324, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:27<00:00,  1.58s/it]

{'eval_loss': 0.670390248298645, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.224, 'eval_steps_per_second': 12.224, 'epoch': 3.0}


100%|██████████| 15/15 [00:28<00:00,  1.93s/it]


{'train_runtime': 28.9308, 'train_samples_per_second': 3.422, 'train_steps_per_second': 0.518, 'train_loss': 0.8904677708943685, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.63it/s]
100%|██████████| 1/1 [00:00<00:00, 165.29it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 33/33 [00:00<00:00, 16479.58 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 953.47 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 33/33 [00:00<00:00, 148.64 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8511841893196106, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0789, 'eval_samples_per_second': 12.672, 'eval_steps_per_second': 12.672, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:07,  1.56s/it]

{'eval_loss': 0.6723594069480896, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0832, 'eval_samples_per_second': 12.023, 'eval_steps_per_second': 12.023, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:27<00:00,  1.59s/it]

{'eval_loss': 0.670390248298645, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.217, 'eval_steps_per_second': 12.217, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.94s/it]


{'train_runtime': 29.1174, 'train_samples_per_second': 3.4, 'train_steps_per_second': 0.515, 'train_loss': 0.8904677708943685, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.42it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
Casting the dataset: 100%|██████████| 34/34 [00:00<00:00, 16987.06 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 151.53 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8463776707649231, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.375, 'eval_steps_per_second': 13.375, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.63s/it]

{'eval_loss': 0.6846566796302795, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.709, 'eval_steps_per_second': 11.709, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.64s/it]

{'eval_loss': 0.6682614088058472, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.028, 'eval_steps_per_second': 13.028, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.97s/it]


{'train_runtime': 29.5292, 'train_samples_per_second': 3.454, 'train_steps_per_second': 0.508, 'train_loss': 0.8787270863850911, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.23it/s]
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
Casting the dataset: 100%|██████████| 34/34 [00:00<00:00, 17009.34 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.99 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.22 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 146.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.32 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8463776707649231, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.222, 'eval_steps_per_second': 12.222, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.63s/it]

{'eval_loss': 0.6846566796302795, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.504, 'eval_steps_per_second': 11.504, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.63s/it]

{'eval_loss': 0.6682614088058472, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.366, 'eval_steps_per_second': 12.366, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.96s/it]


{'train_runtime': 29.41, 'train_samples_per_second': 3.468, 'train_steps_per_second': 0.51, 'train_loss': 0.8787270863850911, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.57it/s]
100%|██████████| 1/1 [00:00<00:00, 164.88it/s]
100%|██████████| 1/1 [00:00<00:00, 164.90it/s]
Casting the dataset: 100%|██████████| 34/34 [00:00<00:00, 16597.57 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 897.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 139.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.82 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8463776707649231, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.077, 'eval_steps_per_second': 12.077, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.64s/it]

{'eval_loss': 0.6846566796302795, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.248, 'eval_steps_per_second': 11.248, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.64s/it]

{'eval_loss': 0.6682614088058472, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.931, 'eval_steps_per_second': 11.931, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.97s/it]


{'train_runtime': 29.5054, 'train_samples_per_second': 3.457, 'train_steps_per_second': 0.508, 'train_loss': 0.8787270863850911, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 140.85it/s]
100%|██████████| 1/1 [00:00<00:00, 141.51it/s]
100%|██████████| 1/1 [00:00<00:00, 164.92it/s]
Casting the dataset: 100%|██████████| 34/34 [00:00<00:00, 16978.97 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 153.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.61 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8463776707649231, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.641, 'eval_steps_per_second': 11.641, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.63s/it]

{'eval_loss': 0.6846566796302795, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.211, 'eval_steps_per_second': 12.211, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.65s/it]

{'eval_loss': 0.6682614088058472, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.383, 'eval_steps_per_second': 12.383, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.96s/it]


{'train_runtime': 29.4413, 'train_samples_per_second': 3.465, 'train_steps_per_second': 0.509, 'train_loss': 0.8787270863850911, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.30it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 141.61it/s]
Casting the dataset: 100%|██████████| 34/34 [00:00<00:00, 11153.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 152.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.48 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8463776707649231, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.281, 'eval_steps_per_second': 12.281, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.63s/it]

{'eval_loss': 0.6846566796302795, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0837, 'eval_samples_per_second': 11.943, 'eval_steps_per_second': 11.943, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.66s/it]

{'eval_loss': 0.6682614088058472, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.144, 'eval_steps_per_second': 12.144, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.98s/it]


{'train_runtime': 29.717, 'train_samples_per_second': 3.432, 'train_steps_per_second': 0.505, 'train_loss': 0.8787270863850911, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.66it/s]
100%|██████████| 1/1 [00:00<00:00, 141.65it/s]
100%|██████████| 1/1 [00:00<00:00, 141.68it/s]
Casting the dataset: 100%|██████████| 35/35 [00:00<00:00, 17488.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 148.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.77 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8427267670631409, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0865, 'eval_samples_per_second': 11.563, 'eval_steps_per_second': 11.563, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:08,  1.74s/it]

{'eval_loss': 0.6838108897209167, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.636, 'eval_steps_per_second': 11.636, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.74s/it]

{'eval_loss': 0.6680456399917603, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1343, 'eval_samples_per_second': 7.443, 'eval_steps_per_second': 7.443, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.05s/it]


{'train_runtime': 30.7964, 'train_samples_per_second': 3.409, 'train_steps_per_second': 0.487, 'train_loss': 0.8776524861653646, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.83it/s]
100%|██████████| 1/1 [00:00<00:00, 165.07it/s]
100%|██████████| 1/1 [00:00<00:00, 141.63it/s]
Casting the dataset: 100%|██████████| 35/35 [00:00<00:00, 17501.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.75 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 152.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.55 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8427267670631409, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.09, 'eval_samples_per_second': 11.109, 'eval_steps_per_second': 11.109, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.68s/it]

{'eval_loss': 0.6838108897209167, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1293, 'eval_samples_per_second': 7.734, 'eval_steps_per_second': 7.734, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.70s/it]

{'eval_loss': 0.6680456399917603, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.507, 'eval_steps_per_second': 11.507, 'epoch': 3.0}


100%|██████████| 15/15 [00:29<00:00,  1.99s/it]


{'train_runtime': 29.8658, 'train_samples_per_second': 3.516, 'train_steps_per_second': 0.502, 'train_loss': 0.8776524861653646, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
100%|██████████| 1/1 [00:00<00:00, 166.63it/s]
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
Casting the dataset: 100%|██████████| 35/35 [00:00<00:00, 17147.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 144.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.08 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8427267670631409, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1342, 'eval_samples_per_second': 7.453, 'eval_steps_per_second': 7.453, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.71s/it]

{'eval_loss': 0.6838108897209167, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.055, 'eval_steps_per_second': 12.055, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.73s/it]

{'eval_loss': 0.6680456399917603, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.252, 'eval_steps_per_second': 11.252, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.04s/it]


{'train_runtime': 30.5492, 'train_samples_per_second': 3.437, 'train_steps_per_second': 0.491, 'train_loss': 0.8776524861653646, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 165.05it/s]
Casting the dataset: 100%|██████████| 35/35 [00:00<00:00, 17113.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 129.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 487.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8427267670631409, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.075, 'eval_steps_per_second': 12.075, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:18<00:08,  1.68s/it]

{'eval_loss': 0.6838108897209167, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.091, 'eval_samples_per_second': 10.993, 'eval_steps_per_second': 10.993, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:28<00:00,  1.71s/it]

{'eval_loss': 0.6680456399917603, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.502, 'eval_steps_per_second': 11.502, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.01s/it]


{'train_runtime': 30.1366, 'train_samples_per_second': 3.484, 'train_steps_per_second': 0.498, 'train_loss': 0.8776524861653646, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.43it/s]
100%|██████████| 1/1 [00:00<00:00, 164.97it/s]
100%|██████████| 1/1 [00:00<00:00, 110.02it/s]
Casting the dataset: 100%|██████████| 35/35 [00:00<00:00, 17476.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 962.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 153.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8427267670631409, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1335, 'eval_samples_per_second': 7.492, 'eval_steps_per_second': 7.492, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:08,  1.71s/it]

{'eval_loss': 0.6838108897209167, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.091, 'eval_samples_per_second': 10.988, 'eval_steps_per_second': 10.988, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.74s/it]

{'eval_loss': 0.6680456399917603, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0892, 'eval_samples_per_second': 11.215, 'eval_steps_per_second': 11.215, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.04s/it]


{'train_runtime': 30.6734, 'train_samples_per_second': 3.423, 'train_steps_per_second': 0.489, 'train_loss': 0.8776524861653646, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
100%|██████████| 1/1 [00:00<00:00, 99.40it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
Casting the dataset: 100%|██████████| 36/36 [00:00<00:00, 17984.15 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 491.25 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 956.73 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 143.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.32 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.8270094990730286, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1345, 'eval_samples_per_second': 7.434, 'eval_steps_per_second': 7.434, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:08,  1.77s/it]

{'eval_loss': 0.655001699924469, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1403, 'eval_samples_per_second': 7.128, 'eval_steps_per_second': 7.128, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.75s/it]

{'eval_loss': 0.6531103253364563, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.098, 'eval_samples_per_second': 10.2, 'eval_steps_per_second': 10.2, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.05s/it]


{'train_runtime': 30.7484, 'train_samples_per_second': 3.512, 'train_steps_per_second': 0.488, 'train_loss': 0.8910014470418294, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.48it/s]
100%|██████████| 1/1 [00:00<00:00, 165.16it/s]
100%|██████████| 1/1 [00:00<00:00, 70.61it/s]
Casting the dataset: 100%|██████████| 36/36 [00:00<00:00, 17997.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 136.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8270094990730286, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0862, 'eval_samples_per_second': 11.603, 'eval_steps_per_second': 11.603, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.81s/it]

{'eval_loss': 0.655001699924469, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.077, 'eval_steps_per_second': 12.077, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.80s/it]

{'eval_loss': 0.6531103253364563, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1253, 'eval_samples_per_second': 7.98, 'eval_steps_per_second': 7.98, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.07s/it]


{'train_runtime': 31.0329, 'train_samples_per_second': 3.48, 'train_steps_per_second': 0.483, 'train_loss': 0.8910014470418294, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
100%|██████████| 1/1 [00:00<00:00, 164.97it/s]
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
Casting the dataset: 100%|██████████| 36/36 [00:00<00:00, 18001.30 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 133.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.82 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8270094990730286, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1293, 'eval_samples_per_second': 7.737, 'eval_steps_per_second': 7.737, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:08,  1.75s/it]

{'eval_loss': 0.655001699924469, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.129, 'eval_samples_per_second': 7.753, 'eval_steps_per_second': 7.753, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.77s/it]

{'eval_loss': 0.6531103253364563, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1295, 'eval_samples_per_second': 7.72, 'eval_steps_per_second': 7.72, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.06s/it]


{'train_runtime': 30.8346, 'train_samples_per_second': 3.503, 'train_steps_per_second': 0.486, 'train_loss': 0.8910014470418294, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.24it/s]
100%|██████████| 1/1 [00:00<00:00, 142.27it/s]
100%|██████████| 1/1 [00:00<00:00, 165.20it/s]
Casting the dataset: 100%|██████████| 36/36 [00:00<00:00, 17990.58 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 145.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8270094990730286, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1012, 'eval_samples_per_second': 9.878, 'eval_steps_per_second': 9.878, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:08,  1.79s/it]

{'eval_loss': 0.655001699924469, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.845, 'eval_steps_per_second': 12.845, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.79s/it]

{'eval_loss': 0.6531103253364563, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1344, 'eval_samples_per_second': 7.44, 'eval_steps_per_second': 7.44, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.05s/it]


{'train_runtime': 30.7877, 'train_samples_per_second': 3.508, 'train_steps_per_second': 0.487, 'train_loss': 0.8910014470418294, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.31it/s]
100%|██████████| 1/1 [00:00<00:00, 165.03it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
Casting the dataset: 100%|██████████| 36/36 [00:00<00:00, 17670.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.38 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 132.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8270094990730286, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1242, 'eval_samples_per_second': 8.05, 'eval_steps_per_second': 8.05, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:08,  1.75s/it]

{'eval_loss': 0.655001699924469, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1288, 'eval_samples_per_second': 7.763, 'eval_steps_per_second': 7.763, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.77s/it]

{'eval_loss': 0.6531103253364563, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1363, 'eval_samples_per_second': 7.338, 'eval_steps_per_second': 7.338, 'epoch': 3.0}


100%|██████████| 15/15 [00:30<00:00,  2.04s/it]


{'train_runtime': 30.6388, 'train_samples_per_second': 3.525, 'train_steps_per_second': 0.49, 'train_loss': 0.8910014470418294, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 164.60it/s]
100%|██████████| 1/1 [00:00<00:00, 141.28it/s]
Casting the dataset: 100%|██████████| 37/37 [00:00<00:00, 18499.14 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.63 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 491.02 examples/s]
Map: 100%|██████████| 37/37 [00:00<00:00, 134.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8289806842803955, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.787, 'eval_steps_per_second': 11.787, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.83s/it]

{'eval_loss': 0.6594756841659546, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.079, 'eval_samples_per_second': 12.658, 'eval_steps_per_second': 12.658, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.87s/it]

{'eval_loss': 0.6556064486503601, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.065, 'eval_steps_per_second': 12.065, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.10s/it]


{'train_runtime': 31.5734, 'train_samples_per_second': 3.516, 'train_steps_per_second': 0.475, 'train_loss': 0.8884644190470378, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
100%|██████████| 1/1 [00:00<00:00, 36.70it/s]
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
Casting the dataset: 100%|██████████| 37/37 [00:00<00:00, 12332.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.74 examples/s]
Map: 100%|██████████| 37/37 [00:00<00:00, 140.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.81 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8289806842803955, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0865, 'eval_samples_per_second': 11.556, 'eval_steps_per_second': 11.556, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.83s/it]

{'eval_loss': 0.6594756841659546, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0842, 'eval_samples_per_second': 11.879, 'eval_steps_per_second': 11.879, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.82s/it]

{'eval_loss': 0.6556064486503601, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.223, 'eval_steps_per_second': 12.223, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.08s/it]


{'train_runtime': 31.2549, 'train_samples_per_second': 3.551, 'train_steps_per_second': 0.48, 'train_loss': 0.8884644190470378, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.59it/s]
100%|██████████| 1/1 [00:00<00:00, 141.40it/s]
100%|██████████| 1/1 [00:00<00:00, 164.46it/s]
Casting the dataset: 100%|██████████| 37/37 [00:00<00:00, 18125.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Map: 100%|██████████| 37/37 [00:00<00:00, 136.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8289806842803955, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.205, 'eval_steps_per_second': 12.205, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.82s/it]

{'eval_loss': 0.6594756841659546, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.926, 'eval_steps_per_second': 11.926, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.82s/it]

{'eval_loss': 0.6556064486503601, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.377, 'eval_steps_per_second': 11.377, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.08s/it]


{'train_runtime': 31.1729, 'train_samples_per_second': 3.561, 'train_steps_per_second': 0.481, 'train_loss': 0.8884644190470378, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.74it/s]
100%|██████████| 1/1 [00:00<00:00, 164.81it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
Casting the dataset: 100%|██████████| 37/37 [00:00<00:00, 18514.58 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Map: 100%|██████████| 37/37 [00:00<00:00, 137.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8289806842803955, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.064, 'eval_steps_per_second': 12.064, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.82s/it]

{'eval_loss': 0.6594756841659546, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.218, 'eval_steps_per_second': 12.218, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.82s/it]

{'eval_loss': 0.6556064486503601, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.776, 'eval_steps_per_second': 11.776, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.07s/it]


{'train_runtime': 31.0438, 'train_samples_per_second': 3.576, 'train_steps_per_second': 0.483, 'train_loss': 0.8884644190470378, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.83it/s]
100%|██████████| 1/1 [00:00<00:00, 164.95it/s]
100%|██████████| 1/1 [00:00<00:00, 165.10it/s]
Casting the dataset: 100%|██████████| 37/37 [00:00<00:00, 18193.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 959.14 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 37/37 [00:00<00:00, 131.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.93 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8289806842803955, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.638, 'eval_steps_per_second': 11.638, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:08,  1.78s/it]

{'eval_loss': 0.6594756841659546, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.087, 'eval_samples_per_second': 11.5, 'eval_steps_per_second': 11.5, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:29<00:00,  1.84s/it]

{'eval_loss': 0.6556064486503601, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.021, 'eval_steps_per_second': 13.021, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.07s/it]


{'train_runtime': 31.052, 'train_samples_per_second': 3.575, 'train_steps_per_second': 0.483, 'train_loss': 0.8884644190470378, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 99.11it/s]
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
100%|██████████| 1/1 [00:00<00:00, 141.42it/s]
Casting the dataset: 100%|██████████| 38/38 [00:00<00:00, 18606.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 38/38 [00:00<00:00, 121.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.24 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8305840492248535, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.048, 'eval_steps_per_second': 12.048, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.83s/it]

{'eval_loss': 0.6628474593162537, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.366, 'eval_steps_per_second': 12.366, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.89s/it]

{'eval_loss': 0.6565964221954346, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.422, 'eval_steps_per_second': 11.422, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.10s/it]


{'train_runtime': 31.4885, 'train_samples_per_second': 3.62, 'train_steps_per_second': 0.476, 'train_loss': 0.8863821665445963, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.29it/s]
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
100%|██████████| 1/1 [00:00<00:00, 166.63it/s]
Casting the dataset: 100%|██████████| 38/38 [00:00<00:00, 18663.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 38/38 [00:00<00:00, 125.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8305840492248535, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.372, 'eval_steps_per_second': 12.372, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.84s/it]

{'eval_loss': 0.6628474593162537, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.258, 'eval_steps_per_second': 12.258, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.88s/it]

{'eval_loss': 0.6565964221954346, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.373, 'eval_steps_per_second': 12.373, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.10s/it]


{'train_runtime': 31.5337, 'train_samples_per_second': 3.615, 'train_steps_per_second': 0.476, 'train_loss': 0.8863821665445963, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.67it/s]
100%|██████████| 1/1 [00:00<00:00, 141.57it/s]
100%|██████████| 1/1 [00:00<00:00, 164.73it/s]
Casting the dataset: 100%|██████████| 38/38 [00:00<00:00, 18999.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 38/38 [00:00<00:00, 120.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8305840492248535, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0811, 'eval_samples_per_second': 12.333, 'eval_steps_per_second': 12.333, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.88s/it]

{'eval_loss': 0.6628474593162537, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.916, 'eval_steps_per_second': 11.916, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.85s/it]

{'eval_loss': 0.6565964221954346, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.919, 'eval_steps_per_second': 11.919, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.10s/it]


{'train_runtime': 31.5295, 'train_samples_per_second': 3.616, 'train_steps_per_second': 0.476, 'train_loss': 0.8863821665445963, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 165.07it/s]
100%|██████████| 1/1 [00:00<00:00, 164.66it/s]
Casting the dataset: 100%|██████████| 38/38 [00:00<00:00, 19010.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 38/38 [00:00<00:00, 126.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8305840492248535, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.652, 'eval_steps_per_second': 11.652, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.83s/it]

{'eval_loss': 0.6628474593162537, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.294, 'eval_steps_per_second': 12.294, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.89s/it]

{'eval_loss': 0.6565964221954346, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.503, 'eval_steps_per_second': 11.503, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.10s/it]


{'train_runtime': 31.4756, 'train_samples_per_second': 3.622, 'train_steps_per_second': 0.477, 'train_loss': 0.8863821665445963, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.90it/s]
100%|██████████| 1/1 [00:00<00:00, 141.52it/s]
100%|██████████| 1/1 [00:00<00:00, 164.76it/s]
Casting the dataset: 100%|██████████| 38/38 [00:00<00:00, 18999.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 960.89 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 38/38 [00:00<00:00, 127.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8305840492248535, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0932, 'eval_samples_per_second': 10.728, 'eval_steps_per_second': 10.728, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.86s/it]

{'eval_loss': 0.6628474593162537, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.213, 'eval_steps_per_second': 12.213, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.88s/it]

{'eval_loss': 0.6565964221954346, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.538, 'eval_steps_per_second': 12.538, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.10s/it]


{'train_runtime': 31.5551, 'train_samples_per_second': 3.613, 'train_steps_per_second': 0.475, 'train_loss': 0.8863821665445963, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
100%|██████████| 1/1 [00:00<00:00, 141.24it/s]
Casting the dataset: 100%|██████████| 39/39 [00:00<00:00, 19492.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 39/39 [00:00<00:00, 130.31 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.74 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8311963081359863, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.912, 'eval_steps_per_second': 11.912, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.92s/it]

{'eval_loss': 0.6655533313751221, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.006, 'eval_steps_per_second': 13.006, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.90s/it]

{'eval_loss': 0.6587689518928528, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.861, 'eval_steps_per_second': 12.861, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.13s/it]


{'train_runtime': 31.9039, 'train_samples_per_second': 3.667, 'train_steps_per_second': 0.47, 'train_loss': 0.8851441065470378, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.67it/s]
100%|██████████| 1/1 [00:00<00:00, 164.86it/s]
100%|██████████| 1/1 [00:00<00:00, 164.75it/s]
Casting the dataset: 100%|██████████| 39/39 [00:00<00:00, 19473.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 39/39 [00:00<00:00, 122.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8311963081359863, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0764, 'eval_samples_per_second': 13.097, 'eval_steps_per_second': 13.097, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.91s/it]

{'eval_loss': 0.6655533313751221, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.146, 'eval_steps_per_second': 12.146, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.92s/it]

{'eval_loss': 0.6587689518928528, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.685, 'eval_steps_per_second': 12.685, 'epoch': 3.0}


100%|██████████| 15/15 [00:32<00:00,  2.14s/it]


{'train_runtime': 32.1163, 'train_samples_per_second': 3.643, 'train_steps_per_second': 0.467, 'train_loss': 0.8851441065470378, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
100%|██████████| 1/1 [00:00<00:00, 141.67it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
Casting the dataset: 100%|██████████| 39/39 [00:00<00:00, 13004.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 39/39 [00:00<00:00, 125.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8311963081359863, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0759, 'eval_samples_per_second': 13.182, 'eval_steps_per_second': 13.182, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:19<00:09,  1.92s/it]

{'eval_loss': 0.6655533313751221, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.511, 'eval_steps_per_second': 12.511, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.93s/it]

{'eval_loss': 0.6587689518928528, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0759, 'eval_samples_per_second': 13.179, 'eval_steps_per_second': 13.179, 'epoch': 3.0}


100%|██████████| 15/15 [00:31<00:00,  2.13s/it]


{'train_runtime': 31.956, 'train_samples_per_second': 3.661, 'train_steps_per_second': 0.469, 'train_loss': 0.8851441065470378, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 161.92it/s]
100%|██████████| 1/1 [00:00<00:00, 140.84it/s]
100%|██████████| 1/1 [00:00<00:00, 141.24it/s]
Casting the dataset: 100%|██████████| 39/39 [00:00<00:00, 19499.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 39/39 [00:00<00:00, 122.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.59 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.8311963081359863, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.052, 'eval_steps_per_second': 12.052, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.93s/it]

{'eval_loss': 0.6655533313751221, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0759, 'eval_samples_per_second': 13.172, 'eval_steps_per_second': 13.172, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.95s/it]

{'eval_loss': 0.6587689518928528, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0827, 'eval_samples_per_second': 12.086, 'eval_steps_per_second': 12.086, 'epoch': 3.0}


100%|██████████| 15/15 [00:32<00:00,  2.15s/it]


{'train_runtime': 32.2366, 'train_samples_per_second': 3.629, 'train_steps_per_second': 0.465, 'train_loss': 0.8851441065470378, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
100%|██████████| 1/1 [00:00<00:00, 141.67it/s]
100%|██████████| 1/1 [00:00<00:00, 141.19it/s]
Casting the dataset: 100%|██████████| 39/39 [00:00<00:00, 19085.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Map: 100%|██████████| 39/39 [00:00<00:00, 126.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 281.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8311963081359863, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0773, 'eval_samples_per_second': 12.931, 'eval_steps_per_second': 12.931, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.93s/it]

{'eval_loss': 0.6655533313751221, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0757, 'eval_samples_per_second': 13.202, 'eval_steps_per_second': 13.202, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.94s/it]

{'eval_loss': 0.6587689518928528, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0817, 'eval_samples_per_second': 12.234, 'eval_steps_per_second': 12.234, 'epoch': 3.0}


100%|██████████| 15/15 [00:32<00:00,  2.14s/it]


{'train_runtime': 32.0475, 'train_samples_per_second': 3.651, 'train_steps_per_second': 0.468, 'train_loss': 0.8851441065470378, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.62it/s]
100%|██████████| 1/1 [00:00<00:00, 164.94it/s]
100%|██████████| 1/1 [00:00<00:00, 164.82it/s]
Casting the dataset: 100%|██████████| 40/40 [00:00<00:00, 13149.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 127.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8269400596618652, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.838, 'eval_steps_per_second': 12.838, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.99s/it]

{'eval_loss': 0.6562997102737427, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.067, 'eval_steps_per_second': 12.067, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:31<00:00,  2.01s/it]

{'eval_loss': 0.6551523208618164, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.648, 'eval_steps_per_second': 11.648, 'epoch': 3.0}


100%|██████████| 15/15 [00:32<00:00,  2.18s/it]


{'train_runtime': 32.7677, 'train_samples_per_second': 3.662, 'train_steps_per_second': 0.458, 'train_loss': 0.8908374150594075, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
100%|██████████| 1/1 [00:00<00:00, 140.02it/s]
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
Casting the dataset: 100%|██████████| 40/40 [00:00<00:00, 13319.48 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 491.19 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 125.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 323.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.73 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8269400596618652, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.209, 'eval_steps_per_second': 12.209, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.97s/it]

{'eval_loss': 0.6562997102737427, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.045, 'eval_steps_per_second': 12.045, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:30<00:00,  1.96s/it]

{'eval_loss': 0.6551523208618164, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.218, 'eval_steps_per_second': 12.218, 'epoch': 3.0}


100%|██████████| 15/15 [00:32<00:00,  2.15s/it]


{'train_runtime': 32.2669, 'train_samples_per_second': 3.719, 'train_steps_per_second': 0.465, 'train_loss': 0.8908374150594075, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.21it/s]
100%|██████████| 1/1 [00:00<00:00, 139.00it/s]
100%|██████████| 1/1 [00:00<00:00, 164.73it/s]
Casting the dataset: 100%|██████████| 40/40 [00:00<00:00, 19996.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 956.95 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 123.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.8269400596618652, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.031, 'eval_steps_per_second': 12.031, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.98s/it]

{'eval_loss': 0.6562997102737427, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.204, 'eval_steps_per_second': 12.204, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:31<00:00,  1.99s/it]

{'eval_loss': 0.6551523208618164, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.265, 'eval_steps_per_second': 12.265, 'epoch': 3.0}


100%|██████████| 15/15 [00:32<00:00,  2.18s/it]


{'train_runtime': 32.6469, 'train_samples_per_second': 3.676, 'train_steps_per_second': 0.459, 'train_loss': 0.8908374150594075, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.54it/s]
100%|██████████| 1/1 [00:00<00:00, 140.86it/s]
100%|██████████| 1/1 [00:00<00:00, 164.72it/s]
Casting the dataset: 100%|██████████| 40/40 [00:00<00:00, 20018.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 127.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8269400596618652, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.367, 'eval_steps_per_second': 12.367, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.98s/it]

{'eval_loss': 0.6562997102737427, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.013, 'eval_steps_per_second': 13.013, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:31<00:00,  2.00s/it]

{'eval_loss': 0.6551523208618164, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.359, 'eval_steps_per_second': 12.359, 'epoch': 3.0}


100%|██████████| 15/15 [00:32<00:00,  2.17s/it]


{'train_runtime': 32.5832, 'train_samples_per_second': 3.683, 'train_steps_per_second': 0.46, 'train_loss': 0.8908374150594075, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.79it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
Casting the dataset: 100%|██████████| 40/40 [00:00<00:00, 19989.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 955.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.39 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 124.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.8269400596618652, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0875, 'eval_samples_per_second': 11.425, 'eval_steps_per_second': 11.425, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 10/15 [00:20<00:09,  1.99s/it]

{'eval_loss': 0.6562997102737427, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.787, 'eval_steps_per_second': 11.787, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 15/15 [00:31<00:00,  1.99s/it]

{'eval_loss': 0.6551523208618164, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.289, 'eval_steps_per_second': 12.289, 'epoch': 3.0}


100%|██████████| 15/15 [00:32<00:00,  2.17s/it]


{'train_runtime': 32.5418, 'train_samples_per_second': 3.688, 'train_steps_per_second': 0.461, 'train_loss': 0.8908374150594075, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 49.76it/s]
100%|██████████| 1/1 [00:00<00:00, 141.79it/s]
100%|██████████| 1/1 [00:00<00:00, 200.02it/s]
Casting the dataset: 100%|██████████| 41/41 [00:00<00:00, 20511.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.70 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 962.00 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 41/41 [00:00<00:00, 122.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6928210258483887, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.92, 'eval_steps_per_second': 11.92, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:21<00:09,  1.58s/it]

{'eval_loss': 0.6481782793998718, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.104, 'eval_steps_per_second': 12.104, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:33<00:00,  1.56s/it]

{'eval_loss': 0.5965183973312378, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.277, 'eval_steps_per_second': 12.277, 'epoch': 3.0}


100%|██████████| 18/18 [00:34<00:00,  1.93s/it]


{'train_runtime': 34.675, 'train_samples_per_second': 3.547, 'train_steps_per_second': 0.519, 'train_loss': 0.7876635127597384, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.39it/s]
100%|██████████| 1/1 [00:00<00:00, 141.38it/s]
100%|██████████| 1/1 [00:00<00:00, 165.08it/s]
Casting the dataset: 100%|██████████| 41/41 [00:00<00:00, 19931.21 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Map: 100%|██████████| 41/41 [00:00<00:00, 125.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.4823688864707947, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.07, 'eval_steps_per_second': 12.07, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:21<00:09,  1.56s/it]

{'eval_loss': 0.4707438051700592, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.147, 'eval_steps_per_second': 12.147, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:32<00:00,  1.53s/it]

{'eval_loss': 0.4494839310646057, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.188, 'eval_steps_per_second': 13.188, 'epoch': 3.0}


100%|██████████| 18/18 [00:34<00:00,  1.90s/it]


{'train_runtime': 34.2587, 'train_samples_per_second': 3.59, 'train_steps_per_second': 0.525, 'train_loss': 0.6777676476372613, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
100%|██████████| 1/1 [00:00<00:00, 141.46it/s]
100%|██████████| 1/1 [00:00<00:00, 181.58it/s]
Casting the dataset: 100%|██████████| 41/41 [00:00<00:00, 20481.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.07 examples/s]
Map: 100%|██████████| 41/41 [00:00<00:00, 126.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.4823688864707947, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0894, 'eval_samples_per_second': 11.183, 'eval_steps_per_second': 11.183, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:21<00:09,  1.55s/it]

{'eval_loss': 0.4707438051700592, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.786, 'eval_steps_per_second': 11.786, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:33<00:00,  1.55s/it]

{'eval_loss': 0.4494839310646057, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.367, 'eval_steps_per_second': 12.367, 'epoch': 3.0}


100%|██████████| 18/18 [00:34<00:00,  1.92s/it]


{'train_runtime': 34.4917, 'train_samples_per_second': 3.566, 'train_steps_per_second': 0.522, 'train_loss': 0.6777676476372613, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.77it/s]
100%|██████████| 1/1 [00:00<00:00, 164.77it/s]
100%|██████████| 1/1 [00:00<00:00, 141.44it/s]
Casting the dataset: 100%|██████████| 41/41 [00:00<00:00, 20136.59 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 41/41 [00:00<00:00, 122.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.4823688864707947, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.854, 'eval_steps_per_second': 12.854, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:21<00:09,  1.55s/it]

{'eval_loss': 0.4707438051700592, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.927, 'eval_steps_per_second': 11.927, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:33<00:00,  1.55s/it]

{'eval_loss': 0.4494839310646057, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0863, 'eval_samples_per_second': 11.59, 'eval_steps_per_second': 11.59, 'epoch': 3.0}


100%|██████████| 18/18 [00:34<00:00,  1.92s/it]


{'train_runtime': 34.5683, 'train_samples_per_second': 3.558, 'train_steps_per_second': 0.521, 'train_loss': 0.6777676476372613, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.80it/s]
100%|██████████| 1/1 [00:00<00:00, 141.48it/s]
100%|██████████| 1/1 [00:00<00:00, 164.75it/s]
Casting the dataset: 100%|██████████| 41/41 [00:00<00:00, 13657.89 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 965.54 examples/s]
Map: 100%|██████████| 41/41 [00:00<00:00, 124.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.85 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.4823688864707947, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.177, 'eval_steps_per_second': 12.177, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:21<00:09,  1.58s/it]

{'eval_loss': 0.4707438051700592, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0883, 'eval_samples_per_second': 11.325, 'eval_steps_per_second': 11.325, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:33<00:00,  1.57s/it]

{'eval_loss': 0.4494839310646057, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0825, 'eval_samples_per_second': 12.127, 'eval_steps_per_second': 12.127, 'epoch': 3.0}


100%|██████████| 18/18 [00:34<00:00,  1.94s/it]


{'train_runtime': 34.916, 'train_samples_per_second': 3.523, 'train_steps_per_second': 0.516, 'train_loss': 0.6777676476372613, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.51it/s]
100%|██████████| 1/1 [00:00<00:00, 141.77it/s]
100%|██████████| 1/1 [00:00<00:00, 141.41it/s]
Casting the dataset: 100%|██████████| 42/42 [00:00<00:00, 21019.06 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 125.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.4831085205078125, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.37, 'eval_steps_per_second': 12.37, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:09,  1.61s/it]

{'eval_loss': 0.4695234000682831, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.366, 'eval_steps_per_second': 12.366, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:34<00:00,  1.63s/it]

{'eval_loss': 0.4477422833442688, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.78, 'eval_steps_per_second': 11.78, 'epoch': 3.0}


100%|██████████| 18/18 [00:35<00:00,  1.98s/it]


{'train_runtime': 35.5679, 'train_samples_per_second': 3.543, 'train_steps_per_second': 0.506, 'train_loss': 0.67483917872111, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.51it/s]
100%|██████████| 1/1 [00:00<00:00, 141.36it/s]
100%|██████████| 1/1 [00:00<00:00, 141.46it/s]
Casting the dataset: 100%|██████████| 42/42 [00:00<00:00, 20979.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 123.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 244.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.4831085205078125, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0804, 'eval_samples_per_second': 12.442, 'eval_steps_per_second': 12.442, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:21<00:09,  1.60s/it]

{'eval_loss': 0.4695234000682831, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.373, 'eval_steps_per_second': 12.373, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:33<00:00,  1.61s/it]

{'eval_loss': 0.4477422833442688, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.93, 'eval_steps_per_second': 11.93, 'epoch': 3.0}


100%|██████████| 18/18 [00:34<00:00,  1.93s/it]


{'train_runtime': 34.7079, 'train_samples_per_second': 3.63, 'train_steps_per_second': 0.519, 'train_loss': 0.67483917872111, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
100%|██████████| 1/1 [00:00<00:00, 141.21it/s]
100%|██████████| 1/1 [00:00<00:00, 200.02it/s]
Casting the dataset: 100%|██████████| 42/42 [00:00<00:00, 20994.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 122.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.4831085205078125, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0789, 'eval_samples_per_second': 12.666, 'eval_steps_per_second': 12.666, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:21<00:09,  1.58s/it]

{'eval_loss': 0.4695234000682831, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.06, 'eval_steps_per_second': 12.06, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:33<00:00,  1.63s/it]

{'eval_loss': 0.4477422833442688, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.888, 'eval_steps_per_second': 11.888, 'epoch': 3.0}


100%|██████████| 18/18 [00:34<00:00,  1.93s/it]


{'train_runtime': 34.7641, 'train_samples_per_second': 3.624, 'train_steps_per_second': 0.518, 'train_loss': 0.67483917872111, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.73it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 165.07it/s]
Casting the dataset: 100%|██████████| 42/42 [00:00<00:00, 20505.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 119.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.4831085205078125, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.139, 'eval_steps_per_second': 12.139, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:09,  1.62s/it]

{'eval_loss': 0.4695234000682831, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0733, 'eval_samples_per_second': 13.647, 'eval_steps_per_second': 13.647, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:33<00:00,  1.63s/it]

{'eval_loss': 0.4477422833442688, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.785, 'eval_steps_per_second': 11.785, 'epoch': 3.0}


100%|██████████| 18/18 [00:35<00:00,  1.96s/it]


{'train_runtime': 35.2861, 'train_samples_per_second': 3.571, 'train_steps_per_second': 0.51, 'train_loss': 0.67483917872111, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
100%|██████████| 1/1 [00:00<00:00, 142.91it/s]
100%|██████████| 1/1 [00:00<00:00, 164.93it/s]
Casting the dataset: 100%|██████████| 42/42 [00:00<00:00, 20991.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 116.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.4831085205078125, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.067, 'eval_steps_per_second': 12.067, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:21<00:09,  1.60s/it]

{'eval_loss': 0.4695234000682831, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.909, 'eval_steps_per_second': 11.909, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:33<00:00,  1.60s/it]

{'eval_loss': 0.4477422833442688, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.146, 'eval_steps_per_second': 12.146, 'epoch': 3.0}


100%|██████████| 18/18 [00:34<00:00,  1.94s/it]


{'train_runtime': 34.9792, 'train_samples_per_second': 3.602, 'train_steps_per_second': 0.515, 'train_loss': 0.67483917872111, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.98it/s]
100%|██████████| 1/1 [00:00<00:00, 141.78it/s]
100%|██████████| 1/1 [00:00<00:00, 141.81it/s]
Casting the dataset: 100%|██████████| 43/43 [00:00<00:00, 21501.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.87 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 953.90 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 119.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.4839550852775574, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0929, 'eval_samples_per_second': 10.759, 'eval_steps_per_second': 10.759, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:10,  1.67s/it]

{'eval_loss': 0.46966204047203064, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0915, 'eval_samples_per_second': 10.929, 'eval_steps_per_second': 10.929, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:34<00:00,  1.68s/it]

{'eval_loss': 0.4475756287574768, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.775, 'eval_steps_per_second': 11.775, 'epoch': 3.0}


100%|██████████| 18/18 [00:35<00:00,  1.99s/it]


{'train_runtime': 35.817, 'train_samples_per_second': 3.602, 'train_steps_per_second': 0.503, 'train_loss': 0.6741691695319282, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
100%|██████████| 1/1 [00:00<00:00, 163.32it/s]
100%|██████████| 1/1 [00:00<00:00, 165.09it/s]
Casting the dataset: 100%|██████████| 43/43 [00:00<00:00, 14159.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 113.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.53 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.42 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.4839550852775574, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0878, 'eval_samples_per_second': 11.385, 'eval_steps_per_second': 11.385, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:09,  1.62s/it]

{'eval_loss': 0.46966204047203064, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0774, 'eval_samples_per_second': 12.924, 'eval_steps_per_second': 12.924, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:33<00:00,  1.66s/it]

{'eval_loss': 0.4475756287574768, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0898, 'eval_samples_per_second': 11.132, 'eval_steps_per_second': 11.132, 'epoch': 3.0}


100%|██████████| 18/18 [00:35<00:00,  1.96s/it]


{'train_runtime': 35.3065, 'train_samples_per_second': 3.654, 'train_steps_per_second': 0.51, 'train_loss': 0.6741691695319282, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.71it/s]
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
100%|██████████| 1/1 [00:00<00:00, 141.54it/s]
Casting the dataset: 100%|██████████| 43/43 [00:00<00:00, 14089.14 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 949.58 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 115.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.69 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.4839550852775574, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1315, 'eval_samples_per_second': 7.605, 'eval_steps_per_second': 7.605, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:09,  1.66s/it]

{'eval_loss': 0.46966204047203064, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0789, 'eval_samples_per_second': 12.677, 'eval_steps_per_second': 12.677, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:34<00:00,  1.68s/it]

{'eval_loss': 0.4475756287574768, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.781, 'eval_steps_per_second': 11.781, 'epoch': 3.0}


100%|██████████| 18/18 [00:35<00:00,  1.99s/it]


{'train_runtime': 35.8171, 'train_samples_per_second': 3.602, 'train_steps_per_second': 0.503, 'train_loss': 0.6741691695319282, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
100%|██████████| 1/1 [00:00<00:00, 142.90it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
Casting the dataset: 100%|██████████| 43/43 [00:00<00:00, 21499.00 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 956.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 121.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.4839550852775574, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.089, 'eval_samples_per_second': 11.239, 'eval_steps_per_second': 11.239, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:09,  1.66s/it]

{'eval_loss': 0.46966204047203064, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.058, 'eval_steps_per_second': 12.058, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:34<00:00,  1.67s/it]

{'eval_loss': 0.4475756287574768, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.055, 'eval_steps_per_second': 12.055, 'epoch': 3.0}


100%|██████████| 18/18 [00:35<00:00,  1.99s/it]


{'train_runtime': 35.8585, 'train_samples_per_second': 3.597, 'train_steps_per_second': 0.502, 'train_loss': 0.6741691695319282, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.71it/s]
100%|██████████| 1/1 [00:00<00:00, 141.23it/s]
100%|██████████| 1/1 [00:00<00:00, 141.61it/s]
Casting the dataset: 100%|██████████| 43/43 [00:00<00:00, 21141.14 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.73 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 119.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 244.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.74 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.4839550852775574, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.51, 'eval_steps_per_second': 11.51, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:09,  1.65s/it]

{'eval_loss': 0.46966204047203064, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.767, 'eval_steps_per_second': 11.767, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:34<00:00,  1.68s/it]

{'eval_loss': 0.4475756287574768, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1273, 'eval_samples_per_second': 7.853, 'eval_steps_per_second': 7.853, 'epoch': 3.0}


100%|██████████| 18/18 [00:35<00:00,  1.98s/it]


{'train_runtime': 35.6759, 'train_samples_per_second': 3.616, 'train_steps_per_second': 0.505, 'train_loss': 0.6741691695319282, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.71it/s]
100%|██████████| 1/1 [00:00<00:00, 140.02it/s]
100%|██████████| 1/1 [00:00<00:00, 164.62it/s]
Casting the dataset: 100%|██████████| 44/44 [00:00<00:00, 21996.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 488.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 120.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.48460105061531067, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1052, 'eval_samples_per_second': 9.504, 'eval_steps_per_second': 9.504, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:10,  1.70s/it]

{'eval_loss': 0.4692501127719879, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1058, 'eval_samples_per_second': 9.453, 'eval_steps_per_second': 9.453, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:34<00:00,  1.70s/it]

{'eval_loss': 0.446929931640625, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1134, 'eval_samples_per_second': 8.821, 'eval_steps_per_second': 8.821, 'epoch': 3.0}


100%|██████████| 18/18 [00:35<00:00,  1.99s/it]


{'train_runtime': 35.9017, 'train_samples_per_second': 3.677, 'train_steps_per_second': 0.501, 'train_loss': 0.6737055778503418, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.59it/s]
100%|██████████| 1/1 [00:00<00:00, 164.83it/s]
100%|██████████| 1/1 [00:00<00:00, 200.05it/s]
Casting the dataset: 100%|██████████| 44/44 [00:00<00:00, 21980.63 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 955.20 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 116.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.48460105061531067, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0921, 'eval_samples_per_second': 10.856, 'eval_steps_per_second': 10.856, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:10,  1.68s/it]

{'eval_loss': 0.4692501127719879, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0991, 'eval_samples_per_second': 10.09, 'eval_steps_per_second': 10.09, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:34<00:00,  1.74s/it]

{'eval_loss': 0.446929931640625, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1244, 'eval_samples_per_second': 8.041, 'eval_steps_per_second': 8.041, 'epoch': 3.0}


100%|██████████| 18/18 [00:35<00:00,  2.00s/it]


{'train_runtime': 35.9415, 'train_samples_per_second': 3.673, 'train_steps_per_second': 0.501, 'train_loss': 0.6737055778503418, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.70it/s]
100%|██████████| 1/1 [00:00<00:00, 140.66it/s]
100%|██████████| 1/1 [00:00<00:00, 123.92it/s]
Casting the dataset: 100%|██████████| 44/44 [00:00<00:00, 14455.19 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 955.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 118.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.48460105061531067, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0865, 'eval_samples_per_second': 11.555, 'eval_steps_per_second': 11.555, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:10,  1.72s/it]

{'eval_loss': 0.4692501127719879, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1236, 'eval_samples_per_second': 8.09, 'eval_steps_per_second': 8.09, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:34<00:00,  1.71s/it]

{'eval_loss': 0.446929931640625, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1275, 'eval_samples_per_second': 7.842, 'eval_steps_per_second': 7.842, 'epoch': 3.0}


100%|██████████| 18/18 [00:36<00:00,  2.00s/it]


{'train_runtime': 36.0309, 'train_samples_per_second': 3.664, 'train_steps_per_second': 0.5, 'train_loss': 0.6737055778503418, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 164.95it/s]
100%|██████████| 1/1 [00:00<00:00, 164.84it/s]
Casting the dataset: 100%|██████████| 44/44 [00:00<00:00, 21980.63 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 116.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.48460105061531067, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1367, 'eval_samples_per_second': 7.315, 'eval_steps_per_second': 7.315, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:10,  1.72s/it]

{'eval_loss': 0.4692501127719879, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1323, 'eval_samples_per_second': 7.559, 'eval_steps_per_second': 7.559, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:34<00:00,  1.68s/it]

{'eval_loss': 0.446929931640625, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1281, 'eval_samples_per_second': 7.804, 'eval_steps_per_second': 7.804, 'epoch': 3.0}


100%|██████████| 18/18 [00:35<00:00,  1.99s/it]


{'train_runtime': 35.8218, 'train_samples_per_second': 3.685, 'train_steps_per_second': 0.502, 'train_loss': 0.6737055778503418, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 141.75it/s]
100%|██████████| 1/1 [00:00<00:00, 164.78it/s]
Casting the dataset: 100%|██████████| 44/44 [00:00<00:00, 14684.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.70 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 117.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.48460105061531067, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1063, 'eval_samples_per_second': 9.407, 'eval_steps_per_second': 9.407, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:22<00:10,  1.70s/it]

{'eval_loss': 0.4692501127719879, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1102, 'eval_samples_per_second': 9.071, 'eval_steps_per_second': 9.071, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:34<00:00,  1.77s/it]

{'eval_loss': 0.446929931640625, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.123, 'eval_samples_per_second': 8.128, 'eval_steps_per_second': 8.128, 'epoch': 3.0}


100%|██████████| 18/18 [00:36<00:00,  2.02s/it]


{'train_runtime': 36.3873, 'train_samples_per_second': 3.628, 'train_steps_per_second': 0.495, 'train_loss': 0.6737055778503418, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 141.19it/s]
Casting the dataset: 100%|██████████| 45/45 [00:00<00:00, 11038.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 113.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.39 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
Yo

{'eval_loss': 0.4848254323005676, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.373, 'eval_steps_per_second': 12.373, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.78s/it]

{'eval_loss': 0.469562828540802, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.223, 'eval_steps_per_second': 12.223, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.79s/it]

{'eval_loss': 0.447152316570282, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.793, 'eval_steps_per_second': 11.793, 'epoch': 3.0}


100%|██████████| 18/18 [00:36<00:00,  2.04s/it]


{'train_runtime': 36.6507, 'train_samples_per_second': 3.683, 'train_steps_per_second': 0.491, 'train_loss': 0.6733808517456055, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.75it/s]
100%|██████████| 1/1 [00:00<00:00, 141.78it/s]
100%|██████████| 1/1 [00:00<00:00, 164.49it/s]
Casting the dataset: 100%|██████████| 45/45 [00:00<00:00, 14999.90 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 959.58 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 116.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.75 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.4848254323005676, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.035, 'eval_steps_per_second': 12.035, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.80s/it]

{'eval_loss': 0.469562828540802, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.05, 'eval_steps_per_second': 12.05, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.81s/it]

{'eval_loss': 0.447152316570282, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.067, 'eval_steps_per_second': 12.067, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.06s/it]


{'train_runtime': 37.1414, 'train_samples_per_second': 3.635, 'train_steps_per_second': 0.485, 'train_loss': 0.6733808517456055, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.33it/s]
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
100%|██████████| 1/1 [00:00<00:00, 164.79it/s]
Casting the dataset: 100%|██████████| 45/45 [00:00<00:00, 14998.70 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 111.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.4848254323005676, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.215, 'eval_steps_per_second': 12.215, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.79s/it]

{'eval_loss': 0.469562828540802, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.92, 'eval_steps_per_second': 11.92, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.80s/it]

{'eval_loss': 0.447152316570282, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0786, 'eval_samples_per_second': 12.718, 'eval_steps_per_second': 12.718, 'epoch': 3.0}


100%|██████████| 18/18 [00:36<00:00,  2.05s/it]


{'train_runtime': 36.8867, 'train_samples_per_second': 3.66, 'train_steps_per_second': 0.488, 'train_loss': 0.6733808517456055, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
100%|██████████| 1/1 [00:00<00:00, 141.08it/s]
100%|██████████| 1/1 [00:00<00:00, 165.34it/s]
Casting the dataset: 100%|██████████| 45/45 [00:00<00:00, 14993.94 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 956.95 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 113.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.4848254323005676, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.359, 'eval_steps_per_second': 12.359, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.80s/it]

{'eval_loss': 0.469562828540802, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.074, 'eval_steps_per_second': 12.074, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.78s/it]

{'eval_loss': 0.447152316570282, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.376, 'eval_steps_per_second': 12.376, 'epoch': 3.0}


100%|██████████| 18/18 [00:36<00:00,  2.05s/it]


{'train_runtime': 36.8473, 'train_samples_per_second': 3.664, 'train_steps_per_second': 0.489, 'train_loss': 0.6733808517456055, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.59it/s]
100%|██████████| 1/1 [00:00<00:00, 141.49it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
Casting the dataset: 100%|██████████| 45/45 [00:00<00:00, 22517.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 109.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.4848254323005676, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.998, 'eval_steps_per_second': 11.998, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:10,  1.80s/it]

{'eval_loss': 0.469562828540802, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.642, 'eval_steps_per_second': 11.642, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.80s/it]

{'eval_loss': 0.447152316570282, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.205, 'eval_steps_per_second': 12.205, 'epoch': 3.0}


100%|██████████| 18/18 [00:36<00:00,  2.05s/it]


{'train_runtime': 36.9113, 'train_samples_per_second': 3.657, 'train_steps_per_second': 0.488, 'train_loss': 0.6733808517456055, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 53.28it/s]
100%|██████████| 1/1 [00:00<00:00, 124.06it/s]
100%|██████████| 1/1 [00:00<00:00, 141.35it/s]
Casting the dataset: 100%|██████████| 46/46 [00:00<00:00, 22982.49 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 116.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.67 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.4846160411834717, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0767, 'eval_samples_per_second': 13.044, 'eval_steps_per_second': 13.044, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.84s/it]

{'eval_loss': 0.469713032245636, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.074, 'eval_steps_per_second': 12.074, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.86s/it]

{'eval_loss': 0.4475519061088562, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0855, 'eval_samples_per_second': 11.691, 'eval_steps_per_second': 11.691, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.06s/it]


{'train_runtime': 37.1084, 'train_samples_per_second': 3.719, 'train_steps_per_second': 0.485, 'train_loss': 0.6735866864522299, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
100%|██████████| 1/1 [00:00<00:00, 164.93it/s]
Casting the dataset: 100%|██████████| 46/46 [00:00<00:00, 15346.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 950.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 114.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.4846160411834717, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.646, 'eval_steps_per_second': 11.646, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.85s/it]

{'eval_loss': 0.469713032245636, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.206, 'eval_steps_per_second': 12.206, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.86s/it]

{'eval_loss': 0.4475519061088562, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0777, 'eval_samples_per_second': 12.863, 'eval_steps_per_second': 12.863, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.08s/it]


{'train_runtime': 37.4989, 'train_samples_per_second': 3.68, 'train_steps_per_second': 0.48, 'train_loss': 0.6735866864522299, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.71it/s]
100%|██████████| 1/1 [00:00<00:00, 164.88it/s]
100%|██████████| 1/1 [00:00<00:00, 123.77it/s]
Casting the dataset: 100%|██████████| 46/46 [00:00<00:00, 22977.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.57 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 106.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.4846160411834717, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.2, 'eval_steps_per_second': 13.2, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.84s/it]

{'eval_loss': 0.469713032245636, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.011, 'eval_steps_per_second': 13.011, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.85s/it]

{'eval_loss': 0.4475519061088562, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.19, 'eval_steps_per_second': 13.19, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.07s/it]


{'train_runtime': 37.3179, 'train_samples_per_second': 3.698, 'train_steps_per_second': 0.482, 'train_loss': 0.6735866864522299, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.41it/s]
100%|██████████| 1/1 [00:00<00:00, 141.77it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 46/46 [00:00<00:00, 22996.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 957.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 951.95 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 115.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 313.34 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 324.79 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.4846160411834717, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.212, 'eval_steps_per_second': 12.212, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.84s/it]

{'eval_loss': 0.469713032245636, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0753, 'eval_samples_per_second': 13.276, 'eval_steps_per_second': 13.276, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.84s/it]

{'eval_loss': 0.4475519061088562, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0759, 'eval_samples_per_second': 13.182, 'eval_steps_per_second': 13.182, 'epoch': 3.0}


100%|██████████| 18/18 [00:36<00:00,  2.05s/it]


{'train_runtime': 36.9753, 'train_samples_per_second': 3.732, 'train_steps_per_second': 0.487, 'train_loss': 0.6735866864522299, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.46it/s]
100%|██████████| 1/1 [00:00<00:00, 141.41it/s]
100%|██████████| 1/1 [00:00<00:00, 164.59it/s]
Casting the dataset: 100%|██████████| 46/46 [00:00<00:00, 15166.89 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 114.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.43 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.4846160411834717, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.027, 'eval_steps_per_second': 13.027, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.84s/it]

{'eval_loss': 0.469713032245636, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.518, 'eval_steps_per_second': 11.518, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.86s/it]

{'eval_loss': 0.4475519061088562, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0749, 'eval_samples_per_second': 13.35, 'eval_steps_per_second': 13.35, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.07s/it]


{'train_runtime': 37.2814, 'train_samples_per_second': 3.702, 'train_steps_per_second': 0.483, 'train_loss': 0.6735866864522299, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.55it/s]
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 164.24it/s]
Casting the dataset: 100%|██████████| 47/47 [00:00<00:00, 15426.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 109.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 294.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 316.43 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.48454877734184265, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0789, 'eval_samples_per_second': 12.677, 'eval_steps_per_second': 12.677, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.90s/it]

{'eval_loss': 0.46917039155960083, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.065, 'eval_steps_per_second': 12.065, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.93s/it]

{'eval_loss': 0.44678473472595215, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.219, 'eval_steps_per_second': 12.219, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.11s/it]


{'train_runtime': 37.9883, 'train_samples_per_second': 3.712, 'train_steps_per_second': 0.474, 'train_loss': 0.6728912459479438, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
100%|██████████| 1/1 [00:00<00:00, 141.21it/s]
100%|██████████| 1/1 [00:00<00:00, 164.59it/s]
Casting the dataset: 100%|██████████| 47/47 [00:00<00:00, 22951.72 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.27 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 106.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.48454877734184265, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.228, 'eval_steps_per_second': 12.228, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.90s/it]

{'eval_loss': 0.46917039155960083, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.51, 'eval_steps_per_second': 12.51, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.91s/it]

{'eval_loss': 0.44678473472595215, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0777, 'eval_samples_per_second': 12.864, 'eval_steps_per_second': 12.864, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.10s/it]


{'train_runtime': 37.869, 'train_samples_per_second': 3.723, 'train_steps_per_second': 0.475, 'train_loss': 0.6728912459479438, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.44it/s]
100%|██████████| 1/1 [00:00<00:00, 139.78it/s]
100%|██████████| 1/1 [00:00<00:00, 165.23it/s]
Casting the dataset: 100%|██████████| 47/47 [00:00<00:00, 22975.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 951.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 103.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.18 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.48454877734184265, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0936, 'eval_samples_per_second': 10.688, 'eval_steps_per_second': 10.688, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.87s/it]

{'eval_loss': 0.46917039155960083, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0737, 'eval_samples_per_second': 13.571, 'eval_steps_per_second': 13.571, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.91s/it]

{'eval_loss': 0.44678473472595215, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.223, 'eval_steps_per_second': 12.223, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.09s/it]


{'train_runtime': 37.6784, 'train_samples_per_second': 3.742, 'train_steps_per_second': 0.478, 'train_loss': 0.6728912459479438, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.64it/s]
100%|██████████| 1/1 [00:00<00:00, 141.80it/s]
100%|██████████| 1/1 [00:00<00:00, 165.16it/s]
Casting the dataset: 100%|██████████| 47/47 [00:00<00:00, 15650.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 113.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.48454877734184265, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.369, 'eval_steps_per_second': 12.369, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:24<00:11,  1.96s/it]

{'eval_loss': 0.46917039155960083, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.066, 'eval_steps_per_second': 12.066, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:37<00:00,  1.93s/it]

{'eval_loss': 0.44678473472595215, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.216, 'eval_steps_per_second': 12.216, 'epoch': 3.0}


100%|██████████| 18/18 [00:38<00:00,  2.14s/it]


{'train_runtime': 38.6049, 'train_samples_per_second': 3.652, 'train_steps_per_second': 0.466, 'train_loss': 0.6728912459479438, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
Casting the dataset: 100%|██████████| 47/47 [00:00<00:00, 15669.05 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.42 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 965.76 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 109.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.48454877734184265, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.843, 'eval_steps_per_second': 12.843, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.87s/it]

{'eval_loss': 0.46917039155960083, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.375, 'eval_steps_per_second': 12.375, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:35<00:00,  1.86s/it]

{'eval_loss': 0.44678473472595215, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.371, 'eval_steps_per_second': 12.371, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.07s/it]


{'train_runtime': 37.2929, 'train_samples_per_second': 3.781, 'train_steps_per_second': 0.483, 'train_loss': 0.6728912459479438, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.04it/s]
100%|██████████| 1/1 [00:00<00:00, 141.08it/s]
100%|██████████| 1/1 [00:00<00:00, 164.68it/s]
Casting the dataset: 100%|██████████| 48/48 [00:00<00:00, 16003.70 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 955.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 107.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.14 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.48453837633132935, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0767, 'eval_samples_per_second': 13.037, 'eval_steps_per_second': 13.037, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.91s/it]

{'eval_loss': 0.4691382646560669, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.215, 'eval_steps_per_second': 12.215, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  2.00s/it]

{'eval_loss': 0.446831077337265, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.361, 'eval_steps_per_second': 12.361, 'epoch': 3.0}


100%|██████████| 18/18 [00:38<00:00,  2.13s/it]


{'train_runtime': 38.2924, 'train_samples_per_second': 3.761, 'train_steps_per_second': 0.47, 'train_loss': 0.6729899512396919, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 82.08it/s]
100%|██████████| 1/1 [00:00<00:00, 165.04it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
Casting the dataset: 100%|██████████| 48/48 [00:00<00:00, 24010.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 488.33 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 108.24 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 314.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 324.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 244.82 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.48453837633132935, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.227, 'eval_steps_per_second': 12.227, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:24<00:11,  1.94s/it]

{'eval_loss': 0.4691382646560669, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.378, 'eval_steps_per_second': 11.378, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.95s/it]

{'eval_loss': 0.446831077337265, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.358, 'eval_steps_per_second': 12.358, 'epoch': 3.0}


100%|██████████| 18/18 [00:38<00:00,  2.12s/it]


{'train_runtime': 38.2021, 'train_samples_per_second': 3.769, 'train_steps_per_second': 0.471, 'train_loss': 0.6729899512396919, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 148.97it/s]
100%|██████████| 1/1 [00:00<00:00, 141.66it/s]
100%|██████████| 1/1 [00:00<00:00, 162.94it/s]
Casting the dataset: 100%|██████████| 48/48 [00:00<00:00, 15992.26 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 880.60 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 108.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.48453837633132935, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0868, 'eval_samples_per_second': 11.517, 'eval_steps_per_second': 11.517, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:24<00:11,  1.96s/it]

{'eval_loss': 0.4691382646560669, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0847, 'eval_samples_per_second': 11.8, 'eval_steps_per_second': 11.8, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.97s/it]

{'eval_loss': 0.446831077337265, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0777, 'eval_samples_per_second': 12.872, 'eval_steps_per_second': 12.872, 'epoch': 3.0}


100%|██████████| 18/18 [00:38<00:00,  2.13s/it]


{'train_runtime': 38.3713, 'train_samples_per_second': 3.753, 'train_steps_per_second': 0.469, 'train_loss': 0.6729899512396919, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.42it/s]
100%|██████████| 1/1 [00:00<00:00, 141.52it/s]
100%|██████████| 1/1 [00:00<00:00, 141.65it/s]
Casting the dataset: 100%|██████████| 48/48 [00:00<00:00, 15974.50 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 109.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.75 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.48453837633132935, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.072, 'eval_steps_per_second': 12.072, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.93s/it]

{'eval_loss': 0.4691382646560669, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.992, 'eval_steps_per_second': 11.992, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.96s/it]

{'eval_loss': 0.446831077337265, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.08, 'eval_steps_per_second': 12.08, 'epoch': 3.0}


100%|██████████| 18/18 [00:38<00:00,  2.12s/it]


{'train_runtime': 38.1929, 'train_samples_per_second': 3.77, 'train_steps_per_second': 0.471, 'train_loss': 0.6729899512396919, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.78it/s]
100%|██████████| 1/1 [00:00<00:00, 164.75it/s]
100%|██████████| 1/1 [00:00<00:00, 141.75it/s]
Casting the dataset: 100%|██████████| 48/48 [00:00<00:00, 23976.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 102.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.58 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
Yo

{'eval_loss': 0.48453837633132935, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.893, 'eval_steps_per_second': 11.893, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 12/18 [00:23<00:11,  1.93s/it]

{'eval_loss': 0.4691382646560669, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.07, 'eval_steps_per_second': 12.07, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 18/18 [00:36<00:00,  1.91s/it]

{'eval_loss': 0.446831077337265, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.074, 'eval_steps_per_second': 12.074, 'epoch': 3.0}


100%|██████████| 18/18 [00:37<00:00,  2.10s/it]


{'train_runtime': 37.7889, 'train_samples_per_second': 3.811, 'train_steps_per_second': 0.476, 'train_loss': 0.6729899512396919, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
100%|██████████| 1/1 [00:00<00:00, 141.63it/s]
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
Casting the dataset: 100%|██████████| 49/49 [00:00<00:00, 24507.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 958.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 105.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 324.21 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.4763448238372803, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.23, 'eval_steps_per_second': 12.23, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:25<00:10,  1.51s/it]

{'eval_loss': 0.4474191963672638, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.708, 'eval_steps_per_second': 11.708, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:38<00:00,  1.55s/it]

{'eval_loss': 0.42050620913505554, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.222, 'eval_steps_per_second': 12.222, 'epoch': 3.0}


100%|██████████| 21/21 [00:39<00:00,  1.88s/it]


{'train_runtime': 39.5346, 'train_samples_per_second': 3.718, 'train_steps_per_second': 0.531, 'train_loss': 0.5964088894072033, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.70it/s]
100%|██████████| 1/1 [00:00<00:00, 139.46it/s]
100%|██████████| 1/1 [00:00<00:00, 141.80it/s]
Casting the dataset: 100%|██████████| 49/49 [00:00<00:00, 24498.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 103.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6536522507667542, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.779, 'eval_steps_per_second': 11.779, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:25<00:10,  1.53s/it]

{'eval_loss': 0.5314712524414062, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.713, 'eval_steps_per_second': 11.713, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:38<00:00,  1.55s/it]

{'eval_loss': 0.4951128661632538, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.093, 'eval_samples_per_second': 10.747, 'eval_steps_per_second': 10.747, 'epoch': 3.0}


100%|██████████| 21/21 [00:40<00:00,  1.91s/it]


{'train_runtime': 40.1817, 'train_samples_per_second': 3.658, 'train_steps_per_second': 0.523, 'train_loss': 0.5180143628801618, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 124.02it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 132.24it/s]
100%|██████████| 1/1 [00:00<00:00, 141.85it/s]
Casting the dataset: 100%|██████████| 49/49 [00:00<00:00, 24466.77 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 102.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6536522507667542, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.218, 'eval_steps_per_second': 12.218, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:25<00:10,  1.52s/it]

{'eval_loss': 0.5314712524414062, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.513, 'eval_steps_per_second': 11.513, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:38<00:00,  1.52s/it]

{'eval_loss': 0.4951128661632538, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.372, 'eval_steps_per_second': 12.372, 'epoch': 3.0}


100%|██████████| 21/21 [00:39<00:00,  1.89s/it]


{'train_runtime': 39.7404, 'train_samples_per_second': 3.699, 'train_steps_per_second': 0.528, 'train_loss': 0.5180143628801618, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 82.97it/s]
100%|██████████| 1/1 [00:00<00:00, 141.01it/s]
Casting the dataset: 100%|██████████| 49/49 [00:00<00:00, 16346.21 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 963.10 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 103.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6536522507667542, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0832, 'eval_samples_per_second': 12.02, 'eval_steps_per_second': 12.02, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:25<00:11,  1.58s/it]

{'eval_loss': 0.5314712524414062, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.067, 'eval_steps_per_second': 12.067, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:39<00:00,  1.59s/it]

{'eval_loss': 0.4951128661632538, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.224, 'eval_steps_per_second': 12.224, 'epoch': 3.0}


100%|██████████| 21/21 [00:40<00:00,  1.95s/it]


{'train_runtime': 40.9954, 'train_samples_per_second': 3.586, 'train_steps_per_second': 0.512, 'train_loss': 0.5180143628801618, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 139.87it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.42it/s]
100%|██████████| 1/1 [00:00<00:00, 164.82it/s]
Casting the dataset: 100%|██████████| 49/49 [00:00<00:00, 16099.08 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 104.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6536522507667542, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.078, 'eval_steps_per_second': 12.078, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:25<00:10,  1.57s/it]

{'eval_loss': 0.5314712524414062, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.217, 'eval_steps_per_second': 12.217, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:39<00:00,  1.58s/it]

{'eval_loss': 0.4951128661632538, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.505, 'eval_steps_per_second': 11.505, 'epoch': 3.0}


100%|██████████| 21/21 [00:40<00:00,  1.93s/it]


{'train_runtime': 40.593, 'train_samples_per_second': 3.621, 'train_steps_per_second': 0.517, 'train_loss': 0.5180143628801618, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.27it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.79it/s]
100%|██████████| 1/1 [00:00<00:00, 165.01it/s]
Casting the dataset: 100%|██████████| 50/50 [00:00<00:00, 16334.23 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 870.91 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 101.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.73 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6570814251899719, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.222, 'eval_steps_per_second': 12.222, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.62s/it]

{'eval_loss': 0.5334435701370239, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.231, 'eval_steps_per_second': 12.231, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.64s/it]

{'eval_loss': 0.4972517192363739, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0835, 'eval_samples_per_second': 11.979, 'eval_steps_per_second': 11.979, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.98s/it]


{'train_runtime': 41.6781, 'train_samples_per_second': 3.599, 'train_steps_per_second': 0.504, 'train_loss': 0.5178167252313524, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.46it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
100%|██████████| 1/1 [00:00<00:00, 164.82it/s]
Casting the dataset: 100%|██████████| 50/50 [00:00<00:00, 16675.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 10433.59 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 101.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.53 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
Yo

{'eval_loss': 0.6570814251899719, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0837, 'eval_samples_per_second': 11.953, 'eval_steps_per_second': 11.953, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:25<00:11,  1.60s/it]

{'eval_loss': 0.5334435701370239, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.209, 'eval_steps_per_second': 12.209, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:39<00:00,  1.61s/it]

{'eval_loss': 0.4972517192363739, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.927, 'eval_steps_per_second': 11.927, 'epoch': 3.0}


100%|██████████| 21/21 [00:40<00:00,  1.93s/it]


{'train_runtime': 40.6014, 'train_samples_per_second': 3.694, 'train_steps_per_second': 0.517, 'train_loss': 0.5178167252313524, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.01it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.26it/s]
100%|██████████| 1/1 [00:00<00:00, 141.61it/s]
Casting the dataset: 100%|██████████| 50/50 [00:00<00:00, 16409.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 100.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.73 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.6570814251899719, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.198, 'eval_steps_per_second': 12.198, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.66s/it]

{'eval_loss': 0.5334435701370239, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.931, 'eval_steps_per_second': 11.931, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.65s/it]

{'eval_loss': 0.4972517192363739, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.068, 'eval_steps_per_second': 12.068, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.00s/it]


{'train_runtime': 42.0991, 'train_samples_per_second': 3.563, 'train_steps_per_second': 0.499, 'train_loss': 0.5178167252313524, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.66it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.74it/s]
100%|██████████| 1/1 [00:00<00:00, 165.16it/s]
Casting the dataset: 100%|██████████| 50/50 [00:00<00:00, 16655.96 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 100.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.14 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6570814251899719, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.068, 'eval_steps_per_second': 12.068, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:25<00:11,  1.62s/it]

{'eval_loss': 0.5334435701370239, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.508, 'eval_steps_per_second': 11.508, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:39<00:00,  1.61s/it]

{'eval_loss': 0.4972517192363739, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.373, 'eval_steps_per_second': 12.373, 'epoch': 3.0}


100%|██████████| 21/21 [00:40<00:00,  1.94s/it]


{'train_runtime': 40.836, 'train_samples_per_second': 3.673, 'train_steps_per_second': 0.514, 'train_loss': 0.5178167252313524, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.73it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.97it/s]
100%|██████████| 1/1 [00:00<00:00, 141.09it/s]
Casting the dataset: 100%|██████████| 50/50 [00:00<00:00, 25004.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 103.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6570814251899719, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.206, 'eval_steps_per_second': 12.206, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.64s/it]

{'eval_loss': 0.5334435701370239, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.213, 'eval_steps_per_second': 12.213, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.64s/it]

{'eval_loss': 0.4972517192363739, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.923, 'eval_steps_per_second': 11.923, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.98s/it]


{'train_runtime': 41.5855, 'train_samples_per_second': 3.607, 'train_steps_per_second': 0.505, 'train_loss': 0.5178167252313524, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.44it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.30it/s]
100%|██████████| 1/1 [00:00<00:00, 141.52it/s]
Casting the dataset: 100%|██████████| 51/51 [00:00<00:00, 25480.58 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 100.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.6574869155883789, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.913, 'eval_steps_per_second': 11.913, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.66s/it]

{'eval_loss': 0.5321081876754761, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0909, 'eval_samples_per_second': 10.997, 'eval_steps_per_second': 10.997, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.67s/it]

{'eval_loss': 0.4964340329170227, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.624, 'eval_steps_per_second': 11.624, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.97s/it]


{'train_runtime': 41.4293, 'train_samples_per_second': 3.693, 'train_steps_per_second': 0.507, 'train_loss': 0.5181046440487816, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 161.88it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.53it/s]
100%|██████████| 1/1 [00:00<00:00, 141.11it/s]
Casting the dataset: 100%|██████████| 51/51 [00:00<00:00, 16995.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 486.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 101.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6574869155883789, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0855, 'eval_samples_per_second': 11.696, 'eval_steps_per_second': 11.696, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.66s/it]

{'eval_loss': 0.5321081876754761, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.903, 'eval_steps_per_second': 11.903, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.68s/it]

{'eval_loss': 0.4964340329170227, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.066, 'eval_steps_per_second': 12.066, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.99s/it]


{'train_runtime': 41.8052, 'train_samples_per_second': 3.66, 'train_steps_per_second': 0.502, 'train_loss': 0.5181046440487816, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 110.35it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.16it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 51/51 [00:00<00:00, 25036.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 98.72 examples/s] 
Map: 100%|██████████| 1/1 [00:00<00:00, 328.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.32 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
Yo

{'eval_loss': 0.6574869155883789, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.065, 'eval_steps_per_second': 12.065, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.66s/it]

{'eval_loss': 0.5321081876754761, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.644, 'eval_steps_per_second': 11.644, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:39<00:00,  1.62s/it]

{'eval_loss': 0.4964340329170227, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.013, 'eval_steps_per_second': 13.013, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.96s/it]


{'train_runtime': 41.1005, 'train_samples_per_second': 3.723, 'train_steps_per_second': 0.511, 'train_loss': 0.5181046440487816, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.78it/s]
100%|██████████| 1/1 [00:00<00:00, 167.18it/s]
Casting the dataset: 100%|██████████| 51/51 [00:00<00:00, 16770.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 99.89 examples/s] 
Map: 100%|██████████| 1/1 [00:00<00:00, 246.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6574869155883789, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.627, 'eval_steps_per_second': 11.627, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:25<00:11,  1.64s/it]

{'eval_loss': 0.5321081876754761, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0799, 'eval_samples_per_second': 12.522, 'eval_steps_per_second': 12.522, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:39<00:00,  1.68s/it]

{'eval_loss': 0.4964340329170227, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0749, 'eval_samples_per_second': 13.358, 'eval_steps_per_second': 13.358, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.97s/it]


{'train_runtime': 41.2713, 'train_samples_per_second': 3.707, 'train_steps_per_second': 0.509, 'train_loss': 0.5181046440487816, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.42it/s]
100%|██████████| 1/1 [00:00<00:00, 141.44it/s]
Casting the dataset: 100%|██████████| 51/51 [00:00<00:00, 17010.70 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 96.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.89 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6574869155883789, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.078, 'eval_samples_per_second': 12.825, 'eval_steps_per_second': 12.825, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.65s/it]

{'eval_loss': 0.5321081876754761, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.763, 'eval_steps_per_second': 11.763, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.66s/it]

{'eval_loss': 0.4964340329170227, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.766, 'eval_steps_per_second': 11.766, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.97s/it]


{'train_runtime': 41.3575, 'train_samples_per_second': 3.699, 'train_steps_per_second': 0.508, 'train_loss': 0.5181046440487816, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.00it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.04it/s]
100%|██████████| 1/1 [00:00<00:00, 52.13it/s]
Casting the dataset: 100%|██████████| 52/52 [00:00<00:00, 17044.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 98.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sho

{'eval_loss': 0.6576835513114929, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.103, 'eval_samples_per_second': 9.71, 'eval_steps_per_second': 9.71, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.78s/it]

{'eval_loss': 0.5317812561988831, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.641, 'eval_steps_per_second': 11.641, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:41<00:00,  1.75s/it]

{'eval_loss': 0.4963381290435791, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.1514, 'eval_samples_per_second': 6.606, 'eval_steps_per_second': 6.606, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.03s/it]


{'train_runtime': 42.6686, 'train_samples_per_second': 3.656, 'train_steps_per_second': 0.492, 'train_loss': 0.5173321678524926, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.27it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.51it/s]
100%|██████████| 1/1 [00:00<00:00, 164.88it/s]
Casting the dataset: 100%|██████████| 52/52 [00:00<00:00, 25402.26 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 97.64 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.50 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.6576835513114929, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0981, 'eval_samples_per_second': 10.19, 'eval_steps_per_second': 10.19, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:12,  1.72s/it]

{'eval_loss': 0.5317812561988831, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1519, 'eval_samples_per_second': 6.582, 'eval_steps_per_second': 6.582, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.73s/it]

{'eval_loss': 0.4963381290435791, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.1051, 'eval_samples_per_second': 9.515, 'eval_steps_per_second': 9.515, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.02s/it]


{'train_runtime': 42.3503, 'train_samples_per_second': 3.684, 'train_steps_per_second': 0.496, 'train_loss': 0.5173321678524926, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.11it/s]
100%|██████████| 1/1 [00:00<00:00, 61.80it/s]
Casting the dataset: 100%|██████████| 52/52 [00:00<00:00, 17334.59 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.25 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 98.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sho

{'eval_loss': 0.6576835513114929, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1381, 'eval_samples_per_second': 7.24, 'eval_steps_per_second': 7.24, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:12,  1.73s/it]

{'eval_loss': 0.5317812561988831, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.131, 'eval_samples_per_second': 7.635, 'eval_steps_per_second': 7.635, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:41<00:00,  1.76s/it]

{'eval_loss': 0.4963381290435791, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.1306, 'eval_samples_per_second': 7.655, 'eval_steps_per_second': 7.655, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.02s/it]


{'train_runtime': 42.4596, 'train_samples_per_second': 3.674, 'train_steps_per_second': 0.495, 'train_loss': 0.5173321678524926, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.75it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.62it/s]
100%|██████████| 1/1 [00:00<00:00, 164.86it/s]
Casting the dataset: 100%|██████████| 52/52 [00:00<00:00, 16949.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 950.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 97.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.6576835513114929, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1075, 'eval_samples_per_second': 9.299, 'eval_steps_per_second': 9.299, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.68s/it]

{'eval_loss': 0.5317812561988831, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0794, 'eval_samples_per_second': 12.599, 'eval_steps_per_second': 12.599, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.68s/it]

{'eval_loss': 0.4963381290435791, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.089, 'eval_samples_per_second': 11.23, 'eval_steps_per_second': 11.23, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.97s/it]


{'train_runtime': 41.409, 'train_samples_per_second': 3.767, 'train_steps_per_second': 0.507, 'train_loss': 0.5173321678524926, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.52it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.15it/s]
100%|██████████| 1/1 [00:00<00:00, 165.18it/s]
Casting the dataset: 100%|██████████| 52/52 [00:00<00:00, 17333.21 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 957.60 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 95.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.10 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6576835513114929, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.069, 'eval_steps_per_second': 12.069, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:11,  1.70s/it]

{'eval_loss': 0.5317812561988831, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1284, 'eval_samples_per_second': 7.786, 'eval_steps_per_second': 7.786, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.70s/it]

{'eval_loss': 0.4963381290435791, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.1417, 'eval_samples_per_second': 7.055, 'eval_steps_per_second': 7.055, 'epoch': 3.0}


100%|██████████| 21/21 [00:41<00:00,  1.99s/it]


{'train_runtime': 41.7222, 'train_samples_per_second': 3.739, 'train_steps_per_second': 0.503, 'train_loss': 0.5173321678524926, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
100%|██████████| 1/1 [00:00<00:00, 141.63it/s]
Casting the dataset: 100%|██████████| 53/53 [00:00<00:00, 17365.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 951.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 93.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6568081974983215, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0874, 'eval_samples_per_second': 11.447, 'eval_steps_per_second': 11.447, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:12,  1.78s/it]

{'eval_loss': 0.5317721366882324, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0837, 'eval_samples_per_second': 11.943, 'eval_steps_per_second': 11.943, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.79s/it]

{'eval_loss': 0.49585801362991333, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0789, 'eval_samples_per_second': 12.674, 'eval_steps_per_second': 12.674, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.01s/it]


{'train_runtime': 42.2173, 'train_samples_per_second': 3.766, 'train_steps_per_second': 0.497, 'train_loss': 0.5157569703601655, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
Casting the dataset: 100%|██████████| 53/53 [00:00<00:00, 26498.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.94 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 97.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 280.24 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6568081974983215, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.2, 'eval_steps_per_second': 12.2, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:12,  1.77s/it]

{'eval_loss': 0.5317721366882324, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.077, 'eval_steps_per_second': 12.077, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:40<00:00,  1.73s/it]

{'eval_loss': 0.49585801362991333, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.219, 'eval_steps_per_second': 12.219, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.01s/it]


{'train_runtime': 42.2257, 'train_samples_per_second': 3.765, 'train_steps_per_second': 0.497, 'train_loss': 0.5157569703601655, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.41it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.53it/s]
100%|██████████| 1/1 [00:00<00:00, 165.22it/s]
Casting the dataset: 100%|██████████| 53/53 [00:00<00:00, 17396.94 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 95.64 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 239.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 240.73 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.6568081974983215, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.191, 'eval_steps_per_second': 13.191, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.80s/it]

{'eval_loss': 0.5317721366882324, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.216, 'eval_steps_per_second': 12.216, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:41<00:00,  1.79s/it]

{'eval_loss': 0.49585801362991333, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.37, 'eval_steps_per_second': 12.37, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.03s/it]


{'train_runtime': 42.6954, 'train_samples_per_second': 3.724, 'train_steps_per_second': 0.492, 'train_loss': 0.5157569703601655, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.52it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
Casting the dataset: 100%|██████████| 53/53 [00:00<00:00, 17396.94 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 485.40 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 95.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.6568081974983215, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.218, 'eval_steps_per_second': 12.218, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.79s/it]

{'eval_loss': 0.5317721366882324, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.115, 'eval_samples_per_second': 8.696, 'eval_steps_per_second': 8.696, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:41<00:00,  1.79s/it]

{'eval_loss': 0.49585801362991333, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.639, 'eval_steps_per_second': 11.639, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.03s/it]


{'train_runtime': 42.6253, 'train_samples_per_second': 3.73, 'train_steps_per_second': 0.493, 'train_loss': 0.5157569703601655, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.87it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
100%|██████████| 1/1 [00:00<00:00, 61.83it/s]
Casting the dataset: 100%|██████████| 53/53 [00:00<00:00, 17653.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 954.34 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 95.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.6568081974983215, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.528, 'eval_steps_per_second': 12.528, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:12,  1.80s/it]

{'eval_loss': 0.5317721366882324, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.382, 'eval_steps_per_second': 12.382, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:41<00:00,  1.81s/it]

{'eval_loss': 0.49585801362991333, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0749, 'eval_samples_per_second': 13.347, 'eval_steps_per_second': 13.347, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.02s/it]


{'train_runtime': 42.5097, 'train_samples_per_second': 3.74, 'train_steps_per_second': 0.494, 'train_loss': 0.5157569703601655, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.78it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 110.31it/s]
100%|██████████| 1/1 [00:00<00:00, 141.99it/s]
Casting the dataset: 100%|██████████| 54/54 [00:00<00:00, 17719.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 94.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.6578535437583923, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.934, 'eval_steps_per_second': 11.934, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.84s/it]

{'eval_loss': 0.5317894220352173, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.932, 'eval_steps_per_second': 11.932, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:41<00:00,  1.85s/it]

{'eval_loss': 0.49588721990585327, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.707, 'eval_steps_per_second': 11.707, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.04s/it]


{'train_runtime': 42.9294, 'train_samples_per_second': 3.774, 'train_steps_per_second': 0.489, 'train_loss': 0.515696298508417, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.89it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.86it/s]
100%|██████████| 1/1 [00:00<00:00, 166.66it/s]
Casting the dataset: 100%|██████████| 54/54 [00:00<00:00, 13363.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 94.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should prob

{'eval_loss': 0.6578535437583923, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.074, 'eval_steps_per_second': 12.074, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:26<00:12,  1.81s/it]

{'eval_loss': 0.5317894220352173, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.372, 'eval_steps_per_second': 12.372, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:41<00:00,  1.84s/it]

{'eval_loss': 0.49588721990585327, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.903, 'eval_steps_per_second': 11.903, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.03s/it]


{'train_runtime': 42.6488, 'train_samples_per_second': 3.798, 'train_steps_per_second': 0.492, 'train_loss': 0.515696298508417, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.71it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.97it/s]
100%|██████████| 1/1 [00:00<00:00, 165.07it/s]
Casting the dataset: 100%|██████████| 54/54 [00:00<00:00, 18002.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 87.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.6578535437583923, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.789, 'eval_steps_per_second': 11.789, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.84s/it]

{'eval_loss': 0.5317894220352173, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.643, 'eval_steps_per_second': 11.643, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:41<00:00,  1.84s/it]

{'eval_loss': 0.49588721990585327, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0767, 'eval_samples_per_second': 13.031, 'eval_steps_per_second': 13.031, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.04s/it]


{'train_runtime': 42.9429, 'train_samples_per_second': 3.772, 'train_steps_per_second': 0.489, 'train_loss': 0.515696298508417, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 54/54 [00:00<00:00, 13497.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 952.17 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 93.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.66 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.6578535437583923, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.223, 'eval_steps_per_second': 12.223, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.85s/it]

{'eval_loss': 0.5317894220352173, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1146, 'eval_samples_per_second': 8.728, 'eval_steps_per_second': 8.728, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:42<00:00,  1.85s/it]

{'eval_loss': 0.49588721990585327, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0865, 'eval_samples_per_second': 11.566, 'eval_steps_per_second': 11.566, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.08s/it]


{'train_runtime': 43.6017, 'train_samples_per_second': 3.715, 'train_steps_per_second': 0.482, 'train_loss': 0.515696298508417, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.48it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
Casting the dataset: 100%|██████████| 54/54 [00:00<00:00, 17665.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 956.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 95.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.6578535437583923, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0865, 'eval_samples_per_second': 11.567, 'eval_steps_per_second': 11.567, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:12,  1.85s/it]

{'eval_loss': 0.5317894220352173, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0794, 'eval_samples_per_second': 12.589, 'eval_steps_per_second': 12.589, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:41<00:00,  1.84s/it]

{'eval_loss': 0.49588721990585327, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.375, 'eval_steps_per_second': 13.375, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.06s/it]


{'train_runtime': 43.3272, 'train_samples_per_second': 3.739, 'train_steps_per_second': 0.485, 'train_loss': 0.515696298508417, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.27it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.07it/s]
100%|██████████| 1/1 [00:00<00:00, 164.58it/s]
Casting the dataset: 100%|██████████| 55/55 [00:00<00:00, 18312.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 92.98 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.657569169998169, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.845, 'eval_steps_per_second': 12.845, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:13,  1.89s/it]

{'eval_loss': 0.5316734910011292, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.362, 'eval_steps_per_second': 12.362, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:41<00:00,  1.84s/it]

{'eval_loss': 0.49590542912483215, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.357, 'eval_steps_per_second': 12.357, 'epoch': 3.0}


100%|██████████| 21/21 [00:42<00:00,  2.04s/it]


{'train_runtime': 42.932, 'train_samples_per_second': 3.843, 'train_steps_per_second': 0.489, 'train_loss': 0.5155830383300781, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.67it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.77it/s]
100%|██████████| 1/1 [00:00<00:00, 141.66it/s]
Casting the dataset: 100%|██████████| 55/55 [00:00<00:00, 18040.72 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 953.25 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 94.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.657569169998169, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0749, 'eval_samples_per_second': 13.357, 'eval_steps_per_second': 13.357, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:13,  1.90s/it]

{'eval_loss': 0.5316734910011292, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.378, 'eval_steps_per_second': 11.378, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:42<00:00,  1.85s/it]

{'eval_loss': 0.49590542912483215, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.229, 'eval_steps_per_second': 12.229, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.07s/it]


{'train_runtime': 43.4035, 'train_samples_per_second': 3.802, 'train_steps_per_second': 0.484, 'train_loss': 0.5155830383300781, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 32.95it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.74it/s]
100%|██████████| 1/1 [00:00<00:00, 164.88it/s]
Casting the dataset: 100%|██████████| 55/55 [00:00<00:00, 13544.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.98 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 92.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.657569169998169, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.686, 'eval_steps_per_second': 12.686, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:28<00:13,  1.92s/it]

{'eval_loss': 0.5316734910011292, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.361, 'eval_steps_per_second': 12.361, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:42<00:00,  1.91s/it]

{'eval_loss': 0.49590542912483215, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.69, 'eval_steps_per_second': 12.69, 'epoch': 3.0}


100%|██████████| 21/21 [00:44<00:00,  2.10s/it]


{'train_runtime': 44.1279, 'train_samples_per_second': 3.739, 'train_steps_per_second': 0.476, 'train_loss': 0.5155830383300781, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.09it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.15it/s]
100%|██████████| 1/1 [00:00<00:00, 165.21it/s]
Casting the dataset: 100%|██████████| 55/55 [00:00<00:00, 18002.71 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 85.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.657569169998169, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.96, 'eval_steps_per_second': 11.96, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:13,  1.88s/it]

{'eval_loss': 0.5316734910011292, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.021, 'eval_steps_per_second': 13.021, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:42<00:00,  1.89s/it]

{'eval_loss': 0.49590542912483215, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.256, 'eval_steps_per_second': 12.256, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.07s/it]


{'train_runtime': 43.5421, 'train_samples_per_second': 3.789, 'train_steps_per_second': 0.482, 'train_loss': 0.5155830383300781, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.53it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 140.97it/s]
100%|██████████| 1/1 [00:00<00:00, 140.67it/s]
Casting the dataset: 100%|██████████| 55/55 [00:00<00:00, 13751.00 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 955.64 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 95.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.657569169998169, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.05, 'eval_steps_per_second': 12.05, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:28<00:13,  1.92s/it]

{'eval_loss': 0.5316734910011292, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0763, 'eval_samples_per_second': 13.099, 'eval_steps_per_second': 13.099, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:42<00:00,  1.87s/it]

{'eval_loss': 0.49590542912483215, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.077, 'eval_steps_per_second': 12.077, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.09s/it]


{'train_runtime': 43.8657, 'train_samples_per_second': 3.761, 'train_steps_per_second': 0.479, 'train_loss': 0.5155830383300781, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
100%|██████████| 1/1 [00:00<00:00, 164.79it/s]
Casting the dataset: 100%|██████████| 56/56 [00:00<00:00, 13849.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 91.47 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.6567462086677551, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.781, 'eval_steps_per_second': 11.781, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:13,  1.94s/it]

{'eval_loss': 0.5316479802131653, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.219, 'eval_steps_per_second': 12.219, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:42<00:00,  1.93s/it]

{'eval_loss': 0.49545902013778687, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.381, 'eval_steps_per_second': 11.381, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.09s/it]


{'train_runtime': 43.858, 'train_samples_per_second': 3.831, 'train_steps_per_second': 0.479, 'train_loss': 0.5151106062389555, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.22it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.49it/s]
100%|██████████| 1/1 [00:00<00:00, 142.91it/s]
Casting the dataset: 100%|██████████| 56/56 [00:00<00:00, 18668.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 91.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.77 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6567462086677551, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.631, 'eval_steps_per_second': 11.631, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:28<00:13,  1.95s/it]

{'eval_loss': 0.5316479802131653, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.636, 'eval_steps_per_second': 11.636, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:42<00:00,  1.95s/it]

{'eval_loss': 0.49545902013778687, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.643, 'eval_steps_per_second': 11.643, 'epoch': 3.0}


100%|██████████| 21/21 [00:44<00:00,  2.11s/it]


{'train_runtime': 44.2542, 'train_samples_per_second': 3.796, 'train_steps_per_second': 0.475, 'train_loss': 0.5151106062389555, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.44it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 30.98it/s]
100%|██████████| 1/1 [00:00<00:00, 142.90it/s]
Casting the dataset: 100%|██████████| 56/56 [00:00<00:00, 18669.50 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 91.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.6567462086677551, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.368, 'eval_steps_per_second': 13.368, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:28<00:13,  1.96s/it]

{'eval_loss': 0.5316479802131653, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.207, 'eval_steps_per_second': 12.207, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:42<00:00,  1.96s/it]

{'eval_loss': 0.49545902013778687, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.065, 'eval_steps_per_second': 12.065, 'epoch': 3.0}


100%|██████████| 21/21 [00:44<00:00,  2.11s/it]


{'train_runtime': 44.2779, 'train_samples_per_second': 3.794, 'train_steps_per_second': 0.474, 'train_loss': 0.5151106062389555, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.61it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
100%|██████████| 1/1 [00:00<00:00, 141.79it/s]
Casting the dataset: 100%|██████████| 56/56 [00:00<00:00, 18650.23 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 867.13 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 92.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.6567462086677551, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0775, 'eval_samples_per_second': 12.911, 'eval_steps_per_second': 12.911, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:28<00:13,  1.96s/it]

{'eval_loss': 0.5316479802131653, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.089, 'eval_samples_per_second': 11.235, 'eval_steps_per_second': 11.235, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:42<00:00,  1.98s/it]

{'eval_loss': 0.49545902013778687, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.792, 'eval_steps_per_second': 11.792, 'epoch': 3.0}


100%|██████████| 21/21 [00:44<00:00,  2.11s/it]


{'train_runtime': 44.2976, 'train_samples_per_second': 3.793, 'train_steps_per_second': 0.474, 'train_loss': 0.5151106062389555, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.42it/s]
100%|██████████| 1/1 [00:00<00:00, 165.14it/s]
Casting the dataset: 100%|██████████| 56/56 [00:00<00:00, 18481.47 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 91.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.86 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6567462086677551, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0888, 'eval_samples_per_second': 11.262, 'eval_steps_per_second': 11.262, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 14/21 [00:27<00:13,  1.91s/it]

{'eval_loss': 0.5316479802131653, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0734, 'eval_samples_per_second': 13.618, 'eval_steps_per_second': 13.618, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 21/21 [00:42<00:00,  1.93s/it]

{'eval_loss': 0.49545902013778687, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0832, 'eval_samples_per_second': 12.019, 'eval_steps_per_second': 12.019, 'epoch': 3.0}


100%|██████████| 21/21 [00:43<00:00,  2.08s/it]


{'train_runtime': 43.6864, 'train_samples_per_second': 3.846, 'train_steps_per_second': 0.481, 'train_loss': 0.5151106062389555, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.78it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
100%|██████████| 1/1 [00:00<00:00, 141.25it/s]
Casting the dataset: 100%|██████████| 57/57 [00:00<00:00, 28495.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 88.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.37 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.6302197575569153, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.364, 'eval_steps_per_second': 12.364, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.59s/it]

{'eval_loss': 0.49651390314102173, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.929, 'eval_steps_per_second': 11.929, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.59s/it]

{'eval_loss': 0.45626431703567505, 'eval_precision': 0.6666666666666666, 'eval_recall': 0.3333333333333333, 'eval_f1': 0.4444444444444444, 'eval_accuracy': 0.9365079365079365, 'eval_span_f1': 0.4444444444444444, 'eval_runtime': 0.0929, 'eval_samples_per_second': 10.766, 'eval_steps_per_second': 10.766, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.95s/it]


{'train_runtime': 46.8193, 'train_samples_per_second': 3.652, 'train_steps_per_second': 0.513, 'train_loss': 0.45597489674886066, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.72it/s]
100%|██████████| 1/1 [00:00<00:00, 141.78it/s]
Casting the dataset: 100%|██████████| 57/57 [00:00<00:00, 18753.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 87.34 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.5371047854423523, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.226, 'eval_steps_per_second': 12.226, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.56s/it]

{'eval_loss': 0.4440353512763977, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.697, 'eval_steps_per_second': 12.697, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.54s/it]

{'eval_loss': 0.4166874587535858, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.928, 'eval_steps_per_second': 11.928, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.94s/it]


{'train_runtime': 46.541, 'train_samples_per_second': 3.674, 'train_steps_per_second': 0.516, 'train_loss': 0.4505122900009155, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.32it/s]
100%|██████████| 1/1 [00:00<00:00, 164.98it/s]
100%|██████████| 1/1 [00:00<00:00, 141.33it/s]
Casting the dataset: 100%|██████████| 57/57 [00:00<00:00, 18998.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 92.64 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.39 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.5371047854423523, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0825, 'eval_samples_per_second': 12.123, 'eval_steps_per_second': 12.123, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.58s/it]

{'eval_loss': 0.4440353512763977, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.367, 'eval_steps_per_second': 13.367, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:44<00:00,  1.56s/it]

{'eval_loss': 0.4166874587535858, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0845, 'eval_samples_per_second': 11.832, 'eval_steps_per_second': 11.832, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.92s/it]


{'train_runtime': 46.0213, 'train_samples_per_second': 3.716, 'train_steps_per_second': 0.521, 'train_loss': 0.4505122900009155, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.57it/s]
100%|██████████| 1/1 [00:00<00:00, 164.90it/s]
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
Casting the dataset: 100%|██████████| 57/57 [00:00<00:00, 14101.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 92.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pr

{'eval_loss': 0.5371047854423523, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1336, 'eval_samples_per_second': 7.485, 'eval_steps_per_second': 7.485, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.51s/it]

{'eval_loss': 0.4440353512763977, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.211, 'eval_steps_per_second': 12.211, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:44<00:00,  1.54s/it]

{'eval_loss': 0.4166874587535858, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.781, 'eval_steps_per_second': 11.781, 'epoch': 3.0}


100%|██████████| 24/24 [00:45<00:00,  1.91s/it]


{'train_runtime': 45.9398, 'train_samples_per_second': 3.722, 'train_steps_per_second': 0.522, 'train_loss': 0.4505122900009155, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 140.77it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 141.29it/s]
Casting the dataset: 100%|██████████| 57/57 [00:00<00:00, 18612.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 92.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5371047854423523, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0817, 'eval_samples_per_second': 12.237, 'eval_steps_per_second': 12.237, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.57s/it]

{'eval_loss': 0.4440353512763977, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.067, 'eval_steps_per_second': 12.067, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:44<00:00,  1.53s/it]

{'eval_loss': 0.4166874587535858, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.854, 'eval_steps_per_second': 12.854, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.92s/it]


{'train_runtime': 46.1851, 'train_samples_per_second': 3.702, 'train_steps_per_second': 0.52, 'train_loss': 0.4505122900009155, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.25it/s]
100%|██████████| 1/1 [00:00<00:00, 164.99it/s]
100%|██████████| 1/1 [00:00<00:00, 165.11it/s]
Casting the dataset: 100%|██████████| 58/58 [00:00<00:00, 19339.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 86.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 330.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5360758900642395, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.219, 'eval_steps_per_second': 12.219, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.56s/it]

{'eval_loss': 0.44352391362190247, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.786, 'eval_steps_per_second': 11.786, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:44<00:00,  1.56s/it]

{'eval_loss': 0.4161793887615204, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.07, 'eval_steps_per_second': 12.07, 'epoch': 3.0}


100%|██████████| 24/24 [00:45<00:00,  1.90s/it]


{'train_runtime': 45.5048, 'train_samples_per_second': 3.824, 'train_steps_per_second': 0.527, 'train_loss': 0.44879194100697833, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 139.85it/s]
100%|██████████| 1/1 [00:00<00:00, 164.73it/s]
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
Casting the dataset: 100%|██████████| 58/58 [00:00<00:00, 19088.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 89.34 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.5360758900642395, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.927, 'eval_steps_per_second': 11.927, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.61s/it]

{'eval_loss': 0.44352391362190247, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.071, 'eval_steps_per_second': 12.071, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.62s/it]

{'eval_loss': 0.4161793887615204, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.358, 'eval_steps_per_second': 12.358, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.95s/it]


{'train_runtime': 46.8593, 'train_samples_per_second': 3.713, 'train_steps_per_second': 0.512, 'train_loss': 0.44879194100697833, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.38it/s]
100%|██████████| 1/1 [00:00<00:00, 142.88it/s]
100%|██████████| 1/1 [00:00<00:00, 150.28it/s]
Casting the dataset: 100%|██████████| 58/58 [00:00<00:00, 29029.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 91.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 281.55 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.5360758900642395, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.643, 'eval_steps_per_second': 11.643, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.57s/it]

{'eval_loss': 0.44352391362190247, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0855, 'eval_samples_per_second': 11.702, 'eval_steps_per_second': 11.702, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:44<00:00,  1.60s/it]

{'eval_loss': 0.4161793887615204, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.929, 'eval_steps_per_second': 11.929, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.93s/it]


{'train_runtime': 46.2129, 'train_samples_per_second': 3.765, 'train_steps_per_second': 0.519, 'train_loss': 0.44879194100697833, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
100%|██████████| 1/1 [00:00<00:00, 164.32it/s]
Casting the dataset: 100%|██████████| 58/58 [00:00<00:00, 19021.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.69 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 89.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5360758900642395, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0759, 'eval_samples_per_second': 13.18, 'eval_steps_per_second': 13.18, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:12,  1.62s/it]

{'eval_loss': 0.44352391362190247, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.925, 'eval_steps_per_second': 11.925, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.61s/it]

{'eval_loss': 0.4161793887615204, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.216, 'eval_steps_per_second': 12.216, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.95s/it]


{'train_runtime': 46.8986, 'train_samples_per_second': 3.71, 'train_steps_per_second': 0.512, 'train_loss': 0.44879194100697833, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.35it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 164.97it/s]
Casting the dataset: 100%|██████████| 58/58 [00:00<00:00, 19327.05 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 89.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.73 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.5360758900642395, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.505, 'eval_steps_per_second': 11.505, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.56s/it]

{'eval_loss': 0.44352391362190247, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.642, 'eval_steps_per_second': 11.642, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:44<00:00,  1.58s/it]

{'eval_loss': 0.4161793887615204, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.026, 'eval_steps_per_second': 13.026, 'epoch': 3.0}


100%|██████████| 24/24 [00:45<00:00,  1.92s/it]


{'train_runtime': 45.9853, 'train_samples_per_second': 3.784, 'train_steps_per_second': 0.522, 'train_loss': 0.44879194100697833, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.17it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 152.07it/s]
Casting the dataset: 100%|██████████| 59/59 [00:00<00:00, 19671.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 953.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.75 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 90.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.5365394949913025, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.91, 'eval_steps_per_second': 11.91, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.62s/it]

{'eval_loss': 0.4434319734573364, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.054, 'eval_steps_per_second': 12.054, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.65s/it]

{'eval_loss': 0.41621047258377075, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1334, 'eval_samples_per_second': 7.495, 'eval_steps_per_second': 7.495, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.95s/it]


{'train_runtime': 46.7056, 'train_samples_per_second': 3.79, 'train_steps_per_second': 0.514, 'train_loss': 0.4489647150039673, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.82it/s]
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
100%|██████████| 1/1 [00:00<00:00, 164.28it/s]
Casting the dataset: 100%|██████████| 59/59 [00:00<00:00, 19666.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 90.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.19 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should prob

{'eval_loss': 0.5365394949913025, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1426, 'eval_samples_per_second': 7.015, 'eval_steps_per_second': 7.015, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:13,  1.69s/it]

{'eval_loss': 0.4434319734573364, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0879, 'eval_samples_per_second': 11.381, 'eval_steps_per_second': 11.381, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.67s/it]

{'eval_loss': 0.41621047258377075, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1385, 'eval_samples_per_second': 7.221, 'eval_steps_per_second': 7.221, 'epoch': 3.0}


100%|██████████| 24/24 [00:47<00:00,  1.98s/it]


{'train_runtime': 47.6348, 'train_samples_per_second': 3.716, 'train_steps_per_second': 0.504, 'train_loss': 0.4489647150039673, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.65it/s]
100%|██████████| 1/1 [00:00<00:00, 142.45it/s]
100%|██████████| 1/1 [00:00<00:00, 141.54it/s]
Casting the dataset: 100%|██████████| 59/59 [00:00<00:00, 19671.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 962.44 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 90.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.5365394949913025, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1359, 'eval_samples_per_second': 7.357, 'eval_steps_per_second': 7.357, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:13,  1.65s/it]

{'eval_loss': 0.4434319734573364, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1393, 'eval_samples_per_second': 7.179, 'eval_steps_per_second': 7.179, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.64s/it]

{'eval_loss': 0.41621047258377075, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1021, 'eval_samples_per_second': 9.795, 'eval_steps_per_second': 9.795, 'epoch': 3.0}


100%|██████████| 24/24 [00:47<00:00,  1.97s/it]


{'train_runtime': 47.2319, 'train_samples_per_second': 3.747, 'train_steps_per_second': 0.508, 'train_loss': 0.4489647150039673, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.35it/s]
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
100%|██████████| 1/1 [00:00<00:00, 164.61it/s]
Casting the dataset: 100%|██████████| 59/59 [00:00<00:00, 19668.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.44 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 91.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 488.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.59 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5365394949913025, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1315, 'eval_samples_per_second': 7.602, 'eval_steps_per_second': 7.602, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:13,  1.67s/it]

{'eval_loss': 0.4434319734573364, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0953, 'eval_samples_per_second': 10.499, 'eval_steps_per_second': 10.499, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.65s/it]

{'eval_loss': 0.41621047258377075, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.074, 'eval_steps_per_second': 12.074, 'epoch': 3.0}


100%|██████████| 24/24 [00:47<00:00,  1.97s/it]


{'train_runtime': 47.2852, 'train_samples_per_second': 3.743, 'train_steps_per_second': 0.508, 'train_loss': 0.4489647150039673, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.31it/s]
100%|██████████| 1/1 [00:00<00:00, 162.90it/s]
100%|██████████| 1/1 [00:00<00:00, 164.92it/s]
Casting the dataset: 100%|██████████| 59/59 [00:00<00:00, 19352.78 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 90.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5365394949913025, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.906, 'eval_steps_per_second': 11.906, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:29<00:12,  1.61s/it]

{'eval_loss': 0.4434319734573364, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.088, 'eval_samples_per_second': 11.364, 'eval_steps_per_second': 11.364, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:45<00:00,  1.65s/it]

{'eval_loss': 0.41621047258377075, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.087, 'eval_samples_per_second': 11.489, 'eval_steps_per_second': 11.489, 'epoch': 3.0}


100%|██████████| 24/24 [00:46<00:00,  1.94s/it]


{'train_runtime': 46.5264, 'train_samples_per_second': 3.804, 'train_steps_per_second': 0.516, 'train_loss': 0.4489647150039673, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 110.28it/s]
100%|██████████| 1/1 [00:00<00:00, 141.24it/s]
100%|██████████| 1/1 [00:00<00:00, 141.29it/s]
Casting the dataset: 100%|██████████| 60/60 [00:00<00:00, 19604.13 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 89.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.03 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You

{'eval_loss': 0.5367103219032288, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1389, 'eval_samples_per_second': 7.199, 'eval_steps_per_second': 7.199, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:13,  1.70s/it]

{'eval_loss': 0.4432161748409271, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.63, 'eval_steps_per_second': 11.63, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.73s/it]

{'eval_loss': 0.4162678122520447, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1325, 'eval_samples_per_second': 7.549, 'eval_steps_per_second': 7.549, 'epoch': 3.0}


100%|██████████| 24/24 [00:47<00:00,  2.00s/it]


{'train_runtime': 47.9849, 'train_samples_per_second': 3.751, 'train_steps_per_second': 0.5, 'train_loss': 0.44885416825612384, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
100%|██████████| 1/1 [00:00<00:00, 165.04it/s]
100%|██████████| 1/1 [00:00<00:00, 164.39it/s]
Casting the dataset: 100%|██████████| 60/60 [00:00<00:00, 20004.63 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 89.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 238.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 324.31 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 317.56 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.5367103219032288, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0951, 'eval_samples_per_second': 10.517, 'eval_steps_per_second': 10.517, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:13,  1.72s/it]

{'eval_loss': 0.4432161748409271, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0886, 'eval_samples_per_second': 11.287, 'eval_steps_per_second': 11.287, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.72s/it]

{'eval_loss': 0.4162678122520447, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1283, 'eval_samples_per_second': 7.792, 'eval_steps_per_second': 7.792, 'epoch': 3.0}


100%|██████████| 24/24 [00:47<00:00,  1.99s/it]


{'train_runtime': 47.8695, 'train_samples_per_second': 3.76, 'train_steps_per_second': 0.501, 'train_loss': 0.44885416825612384, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
100%|██████████| 1/1 [00:00<00:00, 151.94it/s]
100%|██████████| 1/1 [00:00<00:00, 141.32it/s]
Casting the dataset: 100%|██████████| 60/60 [00:00<00:00, 19995.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 89.53 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5367103219032288, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1323, 'eval_samples_per_second': 7.557, 'eval_steps_per_second': 7.557, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:13,  1.74s/it]

{'eval_loss': 0.4432161748409271, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1295, 'eval_samples_per_second': 7.724, 'eval_steps_per_second': 7.724, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.69s/it]

{'eval_loss': 0.4162678122520447, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.764, 'eval_steps_per_second': 11.764, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.01s/it]


{'train_runtime': 48.1238, 'train_samples_per_second': 3.74, 'train_steps_per_second': 0.499, 'train_loss': 0.44885416825612384, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.84it/s]
100%|██████████| 1/1 [00:00<00:00, 141.72it/s]
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
Casting the dataset: 100%|██████████| 60/60 [00:00<00:00, 19646.99 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 89.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.5367103219032288, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1041, 'eval_samples_per_second': 9.607, 'eval_steps_per_second': 9.607, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:13,  1.72s/it]

{'eval_loss': 0.4432161748409271, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1546, 'eval_samples_per_second': 6.469, 'eval_steps_per_second': 6.469, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:46<00:00,  1.72s/it]

{'eval_loss': 0.4162678122520447, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1298, 'eval_samples_per_second': 7.704, 'eval_steps_per_second': 7.704, 'epoch': 3.0}


100%|██████████| 24/24 [00:47<00:00,  2.00s/it]


{'train_runtime': 47.9766, 'train_samples_per_second': 3.752, 'train_steps_per_second': 0.5, 'train_loss': 0.44885416825612384, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.90it/s]
100%|██████████| 1/1 [00:00<00:00, 141.89it/s]
100%|██████████| 1/1 [00:00<00:00, 165.23it/s]
Casting the dataset: 100%|██████████| 60/60 [00:00<00:00, 20006.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 88.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.5367103219032288, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1044, 'eval_samples_per_second': 9.579, 'eval_steps_per_second': 9.579, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:14,  1.75s/it]

{'eval_loss': 0.4432161748409271, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1278, 'eval_samples_per_second': 7.823, 'eval_steps_per_second': 7.823, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.74s/it]

{'eval_loss': 0.4162678122520447, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1242, 'eval_samples_per_second': 8.053, 'eval_steps_per_second': 8.053, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.02s/it]


{'train_runtime': 48.393, 'train_samples_per_second': 3.72, 'train_steps_per_second': 0.496, 'train_loss': 0.44885416825612384, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
100%|██████████| 1/1 [00:00<00:00, 164.87it/s]
Casting the dataset: 100%|██████████| 61/61 [00:00<00:00, 20312.21 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 952.60 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 87.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 311.84 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.5368999242782593, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.86, 'eval_steps_per_second': 12.86, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:14,  1.79s/it]

{'eval_loss': 0.443089097738266, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.223, 'eval_steps_per_second': 12.223, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.79s/it]

{'eval_loss': 0.4161166846752167, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.66, 'eval_steps_per_second': 11.66, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.03s/it]


{'train_runtime': 48.6818, 'train_samples_per_second': 3.759, 'train_steps_per_second': 0.493, 'train_loss': 0.44884395599365234, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 124.12it/s]
100%|██████████| 1/1 [00:00<00:00, 124.03it/s]
100%|██████████| 1/1 [00:00<00:00, 162.48it/s]
Casting the dataset: 100%|██████████| 61/61 [00:00<00:00, 20323.50 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 957.60 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 86.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.5368999242782593, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.131, 'eval_steps_per_second': 12.131, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:14,  1.81s/it]

{'eval_loss': 0.443089097738266, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.928, 'eval_steps_per_second': 11.928, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.81s/it]

{'eval_loss': 0.4161166846752167, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.362, 'eval_steps_per_second': 12.362, 'epoch': 3.0}


100%|██████████| 24/24 [00:49<00:00,  2.05s/it]


{'train_runtime': 49.1126, 'train_samples_per_second': 3.726, 'train_steps_per_second': 0.489, 'train_loss': 0.44884395599365234, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
100%|██████████| 1/1 [00:00<00:00, 141.29it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
Casting the dataset: 100%|██████████| 61/61 [00:00<00:00, 30458.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 963.99 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 85.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.27 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.5368999242782593, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.069, 'eval_steps_per_second': 12.069, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:14,  1.80s/it]

{'eval_loss': 0.443089097738266, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.929, 'eval_steps_per_second': 11.929, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.78s/it]

{'eval_loss': 0.4161166846752167, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.01, 'eval_steps_per_second': 13.01, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.03s/it]


{'train_runtime': 48.7022, 'train_samples_per_second': 3.758, 'train_steps_per_second': 0.493, 'train_loss': 0.44884395599365234, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.42it/s]
100%|██████████| 1/1 [00:00<00:00, 165.16it/s]
100%|██████████| 1/1 [00:00<00:00, 141.22it/s]
Casting the dataset: 100%|██████████| 61/61 [00:00<00:00, 28368.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 85.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 325.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.79 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5368999242782593, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0793, 'eval_samples_per_second': 12.606, 'eval_steps_per_second': 12.606, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:14,  1.79s/it]

{'eval_loss': 0.443089097738266, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.072, 'eval_steps_per_second': 12.072, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.82s/it]

{'eval_loss': 0.4161166846752167, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.21, 'eval_steps_per_second': 12.21, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.04s/it]


{'train_runtime': 48.9819, 'train_samples_per_second': 3.736, 'train_steps_per_second': 0.49, 'train_loss': 0.44884395599365234, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.48it/s]
100%|██████████| 1/1 [00:00<00:00, 124.05it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 61/61 [00:00<00:00, 15100.78 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 82.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.73 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pr

{'eval_loss': 0.5368999242782593, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.917, 'eval_steps_per_second': 11.917, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:14,  1.78s/it]

{'eval_loss': 0.443089097738266, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.858, 'eval_steps_per_second': 12.858, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.78s/it]

{'eval_loss': 0.4161166846752167, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0852, 'eval_samples_per_second': 11.742, 'eval_steps_per_second': 11.742, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.02s/it]


{'train_runtime': 48.5937, 'train_samples_per_second': 3.766, 'train_steps_per_second': 0.494, 'train_loss': 0.44884395599365234, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.88it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 139.85it/s]
Casting the dataset: 100%|██████████| 62/62 [00:00<00:00, 20424.67 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.86 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 84.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.13 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.5367809534072876, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.057, 'eval_steps_per_second': 12.057, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:14,  1.84s/it]

{'eval_loss': 0.4428485929965973, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.016, 'eval_steps_per_second': 13.016, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:48<00:00,  1.84s/it]

{'eval_loss': 0.41565659642219543, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.508, 'eval_steps_per_second': 11.508, 'epoch': 3.0}


100%|██████████| 24/24 [00:49<00:00,  2.06s/it]


{'train_runtime': 49.3422, 'train_samples_per_second': 3.77, 'train_steps_per_second': 0.486, 'train_loss': 0.4486600160598755, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.80it/s]
100%|██████████| 1/1 [00:00<00:00, 141.74it/s]
100%|██████████| 1/1 [00:00<00:00, 164.52it/s]
Casting the dataset: 100%|██████████| 62/62 [00:00<00:00, 20653.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 951.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 489.82 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 84.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.5367809534072876, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.785, 'eval_steps_per_second': 11.785, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:14,  1.81s/it]

{'eval_loss': 0.4428485929965973, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.686, 'eval_steps_per_second': 12.686, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.83s/it]

{'eval_loss': 0.41565659642219543, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.185, 'eval_steps_per_second': 13.185, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.02s/it]


{'train_runtime': 48.5129, 'train_samples_per_second': 3.834, 'train_steps_per_second': 0.495, 'train_loss': 0.4486600160598755, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.51it/s]
100%|██████████| 1/1 [00:00<00:00, 43.06it/s]
100%|██████████| 1/1 [00:00<00:00, 164.78it/s]
Casting the dataset: 100%|██████████| 62/62 [00:00<00:00, 30361.57 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 86.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sho

{'eval_loss': 0.5367809534072876, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.012, 'eval_steps_per_second': 13.012, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:14,  1.85s/it]

{'eval_loss': 0.4428485929965973, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.848, 'eval_steps_per_second': 12.848, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:48<00:00,  1.86s/it]

{'eval_loss': 0.41565659642219543, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.209, 'eval_steps_per_second': 12.209, 'epoch': 3.0}


100%|██████████| 24/24 [00:49<00:00,  2.06s/it]


{'train_runtime': 49.4918, 'train_samples_per_second': 3.758, 'train_steps_per_second': 0.485, 'train_loss': 0.4486600160598755, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.51it/s]
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
100%|██████████| 1/1 [00:00<00:00, 165.14it/s]
Casting the dataset: 100%|██████████| 62/62 [00:00<00:00, 15309.48 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 84.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5367809534072876, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.064, 'eval_steps_per_second': 12.064, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:14,  1.84s/it]

{'eval_loss': 0.4428485929965973, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.185, 'eval_steps_per_second': 13.185, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.83s/it]

{'eval_loss': 0.41565659642219543, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0822, 'eval_samples_per_second': 12.168, 'eval_steps_per_second': 12.168, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.04s/it]


{'train_runtime': 48.9002, 'train_samples_per_second': 3.804, 'train_steps_per_second': 0.491, 'train_loss': 0.4486600160598755, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.59it/s]
100%|██████████| 1/1 [00:00<00:00, 141.15it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 62/62 [00:00<00:00, 20303.47 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 85.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.89 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5367809534072876, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.996, 'eval_steps_per_second': 11.996, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:14,  1.86s/it]

{'eval_loss': 0.4428485929965973, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.081, 'eval_samples_per_second': 12.343, 'eval_steps_per_second': 12.343, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.81s/it]

{'eval_loss': 0.41565659642219543, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.361, 'eval_steps_per_second': 13.361, 'epoch': 3.0}


100%|██████████| 24/24 [00:49<00:00,  2.05s/it]


{'train_runtime': 49.1333, 'train_samples_per_second': 3.786, 'train_steps_per_second': 0.488, 'train_loss': 0.4486600160598755, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.17it/s]
100%|██████████| 1/1 [00:00<00:00, 141.43it/s]
100%|██████████| 1/1 [00:00<00:00, 141.52it/s]
Casting the dataset: 100%|██████████| 63/63 [00:00<00:00, 12477.15 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 84.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5367742776870728, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.291, 'eval_steps_per_second': 12.291, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:14,  1.84s/it]

{'eval_loss': 0.44272705912590027, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.027, 'eval_steps_per_second': 13.027, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.89s/it]

{'eval_loss': 0.41536495089530945, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.189, 'eval_steps_per_second': 13.189, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.03s/it]


{'train_runtime': 48.7614, 'train_samples_per_second': 3.876, 'train_steps_per_second': 0.492, 'train_loss': 0.4485888083775838, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
100%|██████████| 1/1 [00:00<00:00, 165.20it/s]
100%|██████████| 1/1 [00:00<00:00, 164.37it/s]
Casting the dataset: 100%|██████████| 63/63 [00:00<00:00, 15756.78 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 953.90 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 83.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.5367742776870728, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.846, 'eval_steps_per_second': 12.846, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:15,  1.91s/it]

{'eval_loss': 0.44272705912590027, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0775, 'eval_samples_per_second': 12.905, 'eval_steps_per_second': 12.905, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:48<00:00,  1.90s/it]

{'eval_loss': 0.41536495089530945, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.38, 'eval_steps_per_second': 12.38, 'epoch': 3.0}


100%|██████████| 24/24 [00:49<00:00,  2.07s/it]


{'train_runtime': 49.6595, 'train_samples_per_second': 3.806, 'train_steps_per_second': 0.483, 'train_loss': 0.4485888083775838, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.47it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
100%|██████████| 1/1 [00:00<00:00, 164.85it/s]
Casting the dataset: 100%|██████████| 63/63 [00:00<00:00, 20996.52 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 76.92 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.5367742776870728, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0739, 'eval_samples_per_second': 13.54, 'eval_steps_per_second': 13.54, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:30<00:14,  1.85s/it]

{'eval_loss': 0.44272705912590027, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.367, 'eval_steps_per_second': 12.367, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:47<00:00,  1.89s/it]

{'eval_loss': 0.41536495089530945, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.376, 'eval_steps_per_second': 13.376, 'epoch': 3.0}


100%|██████████| 24/24 [00:48<00:00,  2.03s/it]


{'train_runtime': 48.7587, 'train_samples_per_second': 3.876, 'train_steps_per_second': 0.492, 'train_loss': 0.4485888083775838, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.23it/s]
100%|██████████| 1/1 [00:00<00:00, 164.91it/s]
100%|██████████| 1/1 [00:00<00:00, 141.41it/s]
Casting the dataset: 100%|██████████| 63/63 [00:00<00:00, 15550.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 952.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 81.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5367742776870728, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0811, 'eval_samples_per_second': 12.333, 'eval_steps_per_second': 12.333, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:32<00:15,  1.90s/it]

{'eval_loss': 0.44272705912590027, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.89, 'eval_steps_per_second': 11.89, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:49<00:00,  1.92s/it]

{'eval_loss': 0.41536495089530945, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.194, 'eval_steps_per_second': 13.194, 'epoch': 3.0}


100%|██████████| 24/24 [00:50<00:00,  2.10s/it]


{'train_runtime': 50.3376, 'train_samples_per_second': 3.755, 'train_steps_per_second': 0.477, 'train_loss': 0.4485888083775838, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 30.92it/s]
100%|██████████| 1/1 [00:00<00:00, 141.53it/s]
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
Casting the dataset: 100%|██████████| 63/63 [00:00<00:00, 20998.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 83.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.5367742776870728, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.768, 'eval_steps_per_second': 11.768, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:15,  1.90s/it]

{'eval_loss': 0.44272705912590027, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.367, 'eval_steps_per_second': 12.367, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:48<00:00,  1.89s/it]

{'eval_loss': 0.41536495089530945, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.204, 'eval_steps_per_second': 12.204, 'epoch': 3.0}


100%|██████████| 24/24 [00:49<00:00,  2.07s/it]


{'train_runtime': 49.6712, 'train_samples_per_second': 3.805, 'train_steps_per_second': 0.483, 'train_loss': 0.4485888083775838, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.08it/s]
100%|██████████| 1/1 [00:00<00:00, 141.43it/s]
100%|██████████| 1/1 [00:00<00:00, 164.76it/s]
Casting the dataset: 100%|██████████| 64/64 [00:00<00:00, 15825.70 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 80.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should prob

{'eval_loss': 0.5372291207313538, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.021, 'eval_steps_per_second': 13.021, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:32<00:15,  1.96s/it]

{'eval_loss': 0.4429137706756592, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0909, 'eval_samples_per_second': 10.999, 'eval_steps_per_second': 10.999, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:49<00:00,  1.97s/it]

{'eval_loss': 0.4155379831790924, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.695, 'eval_steps_per_second': 12.695, 'epoch': 3.0}


100%|██████████| 24/24 [00:50<00:00,  2.11s/it]


{'train_runtime': 50.5876, 'train_samples_per_second': 3.795, 'train_steps_per_second': 0.474, 'train_loss': 0.44872772693634033, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.06it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 166.77it/s]
Casting the dataset: 100%|██████████| 64/64 [00:00<00:00, 15768.06 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 79.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.5372291207313538, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.225, 'eval_steps_per_second': 12.225, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:15,  1.94s/it]

{'eval_loss': 0.4429137706756592, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.298, 'eval_steps_per_second': 12.298, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:48<00:00,  1.94s/it]

{'eval_loss': 0.4155379831790924, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.226, 'eval_steps_per_second': 12.226, 'epoch': 3.0}


100%|██████████| 24/24 [00:49<00:00,  2.07s/it]


{'train_runtime': 49.7192, 'train_samples_per_second': 3.862, 'train_steps_per_second': 0.483, 'train_loss': 0.44872772693634033, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 148.26it/s]
100%|██████████| 1/1 [00:00<00:00, 139.54it/s]
100%|██████████| 1/1 [00:00<00:00, 165.20it/s]
Casting the dataset: 100%|██████████| 64/64 [00:00<00:00, 15793.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 80.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 281.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.5372291207313538, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.215, 'eval_steps_per_second': 12.215, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:32<00:15,  1.94s/it]

{'eval_loss': 0.4429137706756592, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.027, 'eval_steps_per_second': 13.027, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:48<00:00,  1.95s/it]

{'eval_loss': 0.4155379831790924, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0817, 'eval_samples_per_second': 12.234, 'eval_steps_per_second': 12.234, 'epoch': 3.0}


100%|██████████| 24/24 [00:50<00:00,  2.10s/it]


{'train_runtime': 50.3195, 'train_samples_per_second': 3.816, 'train_steps_per_second': 0.477, 'train_loss': 0.44872772693634033, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 140.86it/s]
100%|██████████| 1/1 [00:00<00:00, 140.36it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 64/64 [00:00<00:00, 15813.58 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 80.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.5372291207313538, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0842, 'eval_samples_per_second': 11.874, 'eval_steps_per_second': 11.874, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:32<00:15,  1.94s/it]

{'eval_loss': 0.4429137706756592, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.065, 'eval_steps_per_second': 12.065, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:48<00:00,  1.94s/it]

{'eval_loss': 0.4155379831790924, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0752, 'eval_samples_per_second': 13.29, 'eval_steps_per_second': 13.29, 'epoch': 3.0}


100%|██████████| 24/24 [00:50<00:00,  2.09s/it]


{'train_runtime': 50.2497, 'train_samples_per_second': 3.821, 'train_steps_per_second': 0.478, 'train_loss': 0.44872772693634033, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
100%|██████████| 1/1 [00:00<00:00, 140.66it/s]
Casting the dataset: 100%|██████████| 64/64 [00:00<00:00, 20826.71 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 81.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.5372291207313538, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.967, 'eval_steps_per_second': 11.967, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 16/24 [00:31<00:15,  1.92s/it]

{'eval_loss': 0.4429137706756592, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.292, 'eval_steps_per_second': 12.292, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 24/24 [00:48<00:00,  1.98s/it]

{'eval_loss': 0.4155379831790924, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.152, 'eval_steps_per_second': 12.152, 'epoch': 3.0}


100%|██████████| 24/24 [00:50<00:00,  2.08s/it]


{'train_runtime': 50.0261, 'train_samples_per_second': 3.838, 'train_steps_per_second': 0.48, 'train_loss': 0.44872772693634033, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
100%|██████████| 1/1 [00:00<00:00, 141.34it/s]
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
Casting the dataset: 100%|██████████| 65/65 [00:00<00:00, 16114.77 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 941.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 80.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.5262506604194641, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.04, 'eval_steps_per_second': 12.04, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:13,  1.55s/it]

{'eval_loss': 0.41880902647972107, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.894, 'eval_steps_per_second': 11.894, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:50<00:00,  1.53s/it]

{'eval_loss': 0.39437392354011536, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0842, 'eval_samples_per_second': 11.88, 'eval_steps_per_second': 11.88, 'epoch': 3.0}


100%|██████████| 27/27 [00:51<00:00,  1.91s/it]


{'train_runtime': 51.5942, 'train_samples_per_second': 3.779, 'train_steps_per_second': 0.523, 'train_loss': 0.39998835104483144, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.75it/s]
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 141.51it/s]
Casting the dataset: 100%|██████████| 65/65 [00:00<00:00, 21663.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 487.71 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 79.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.4946677088737488, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0835, 'eval_samples_per_second': 11.982, 'eval_steps_per_second': 11.982, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:14,  1.56s/it]

{'eval_loss': 0.4177524447441101, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0888, 'eval_samples_per_second': 11.264, 'eval_steps_per_second': 11.264, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:50<00:00,  1.54s/it]

{'eval_loss': 0.4010585844516754, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.848, 'eval_steps_per_second': 12.848, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.93s/it]


{'train_runtime': 52.121, 'train_samples_per_second': 3.741, 'train_steps_per_second': 0.518, 'train_loss': 0.4143273388897931, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.80it/s]
100%|██████████| 1/1 [00:00<00:00, 79.13it/s]
100%|██████████| 1/1 [00:00<00:00, 82.44it/s]
Casting the dataset: 100%|██████████| 65/65 [00:00<00:00, 21664.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 80.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.4946677088737488, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.214, 'eval_steps_per_second': 12.214, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:13,  1.55s/it]

{'eval_loss': 0.4177524447441101, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.854, 'eval_steps_per_second': 11.854, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:50<00:00,  1.53s/it]

{'eval_loss': 0.4010585844516754, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.152, 'eval_steps_per_second': 12.152, 'epoch': 3.0}


100%|██████████| 27/27 [00:51<00:00,  1.91s/it]


{'train_runtime': 51.6986, 'train_samples_per_second': 3.772, 'train_steps_per_second': 0.522, 'train_loss': 0.4143273388897931, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.63it/s]
100%|██████████| 1/1 [00:00<00:00, 141.76it/s]
100%|██████████| 1/1 [00:00<00:00, 164.57it/s]
Casting the dataset: 100%|██████████| 65/65 [00:00<00:00, 21287.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 952.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 80.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.4946677088737488, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.223, 'eval_steps_per_second': 12.223, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:32<00:14,  1.57s/it]

{'eval_loss': 0.4177524447441101, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.067, 'eval_steps_per_second': 12.067, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:49<00:00,  1.52s/it]

{'eval_loss': 0.4010585844516754, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0784, 'eval_samples_per_second': 12.758, 'eval_steps_per_second': 12.758, 'epoch': 3.0}


100%|██████████| 27/27 [00:51<00:00,  1.89s/it]


{'train_runtime': 51.104, 'train_samples_per_second': 3.816, 'train_steps_per_second': 0.528, 'train_loss': 0.4143273388897931, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
100%|██████████| 1/1 [00:00<00:00, 141.46it/s]
100%|██████████| 1/1 [00:00<00:00, 141.36it/s]
Casting the dataset: 100%|██████████| 65/65 [00:00<00:00, 21376.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 490.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 81.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.4946677088737488, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.776, 'eval_steps_per_second': 11.776, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:32<00:13,  1.53s/it]

{'eval_loss': 0.4177524447441101, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.143, 'eval_steps_per_second': 12.143, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:49<00:00,  1.53s/it]

{'eval_loss': 0.4010585844516754, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.36, 'eval_steps_per_second': 12.36, 'epoch': 3.0}


100%|██████████| 27/27 [00:51<00:00,  1.90s/it]


{'train_runtime': 51.3401, 'train_samples_per_second': 3.798, 'train_steps_per_second': 0.526, 'train_loss': 0.4143273388897931, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.87it/s]
100%|██████████| 1/1 [00:00<00:00, 165.08it/s]
100%|██████████| 1/1 [00:00<00:00, 141.29it/s]
Casting the dataset: 100%|██████████| 66/66 [00:00<00:00, 21648.87 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 994.15 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 79.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 314.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 244.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.4943303167819977, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.92, 'eval_steps_per_second': 11.92, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:14,  1.59s/it]

{'eval_loss': 0.41750189661979675, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.367, 'eval_steps_per_second': 12.367, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:50<00:00,  1.56s/it]

{'eval_loss': 0.400675892829895, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.225, 'eval_steps_per_second': 12.225, 'epoch': 3.0}


100%|██████████| 27/27 [00:51<00:00,  1.91s/it]


{'train_runtime': 51.4562, 'train_samples_per_second': 3.848, 'train_steps_per_second': 0.525, 'train_loss': 0.41350576612684464, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
100%|██████████| 1/1 [00:00<00:00, 164.66it/s]
100%|██████████| 1/1 [00:00<00:00, 130.35it/s]
Casting the dataset: 100%|██████████| 66/66 [00:00<00:00, 16313.52 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.98 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 79.89 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.4943303167819977, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.891, 'eval_steps_per_second': 11.891, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:14,  1.60s/it]

{'eval_loss': 0.41750189661979675, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.137, 'eval_steps_per_second': 12.137, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.61s/it]

{'eval_loss': 0.400675892829895, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.061, 'eval_steps_per_second': 12.061, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.94s/it]


{'train_runtime': 52.3809, 'train_samples_per_second': 3.78, 'train_steps_per_second': 0.515, 'train_loss': 0.41350576612684464, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.34it/s]
100%|██████████| 1/1 [00:00<00:00, 66.20it/s]
100%|██████████| 1/1 [00:00<00:00, 141.32it/s]
Casting the dataset: 100%|██████████| 66/66 [00:00<00:00, 16496.28 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 80.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.4943303167819977, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.059, 'eval_steps_per_second': 12.059, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:14,  1.61s/it]

{'eval_loss': 0.41750189661979675, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.07, 'eval_steps_per_second': 12.07, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.59s/it]

{'eval_loss': 0.400675892829895, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.842, 'eval_steps_per_second': 11.842, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.95s/it]


{'train_runtime': 52.6058, 'train_samples_per_second': 3.764, 'train_steps_per_second': 0.513, 'train_loss': 0.41350576612684464, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.41it/s]
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
100%|██████████| 1/1 [00:00<00:00, 141.48it/s]
Casting the dataset: 100%|██████████| 66/66 [00:00<00:00, 16501.20 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 962.00 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 960.23 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 80.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.4943303167819977, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.687, 'eval_steps_per_second': 12.687, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:14,  1.63s/it]

{'eval_loss': 0.41750189661979675, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.019, 'eval_steps_per_second': 13.019, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.63s/it]

{'eval_loss': 0.400675892829895, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.636, 'eval_steps_per_second': 11.636, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.95s/it]


{'train_runtime': 52.6656, 'train_samples_per_second': 3.76, 'train_steps_per_second': 0.513, 'train_loss': 0.41350576612684464, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 109.15it/s]
100%|██████████| 1/1 [00:00<00:00, 141.72it/s]
100%|██████████| 1/1 [00:00<00:00, 164.55it/s]
Casting the dataset: 100%|██████████| 66/66 [00:00<00:00, 22026.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 958.26 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 79.47 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.4943303167819977, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.198, 'eval_steps_per_second': 13.198, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:14,  1.60s/it]

{'eval_loss': 0.41750189661979675, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.79, 'eval_steps_per_second': 11.79, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:50<00:00,  1.58s/it]

{'eval_loss': 0.400675892829895, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.365, 'eval_steps_per_second': 12.365, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.94s/it]


{'train_runtime': 52.2446, 'train_samples_per_second': 3.79, 'train_steps_per_second': 0.517, 'train_loss': 0.41350576612684464, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
100%|██████████| 1/1 [00:00<00:00, 141.34it/s]
100%|██████████| 1/1 [00:00<00:00, 141.68it/s]
Casting the dataset: 100%|██████████| 67/67 [00:00<00:00, 16582.19 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 78.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.49455544352531433, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0843, 'eval_samples_per_second': 11.858, 'eval_steps_per_second': 11.858, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.67s/it]

{'eval_loss': 0.41717976331710815, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1344, 'eval_samples_per_second': 7.438, 'eval_steps_per_second': 7.438, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.68s/it]

{'eval_loss': 0.4002384543418884, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1451, 'eval_samples_per_second': 6.89, 'eval_steps_per_second': 6.89, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  1.99s/it]


{'train_runtime': 53.6668, 'train_samples_per_second': 3.745, 'train_steps_per_second': 0.503, 'train_loss': 0.41322782304551864, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.49it/s]
100%|██████████| 1/1 [00:00<00:00, 76.60it/s]
100%|██████████| 1/1 [00:00<00:00, 141.52it/s]
Casting the dataset: 100%|██████████| 67/67 [00:00<00:00, 16569.48 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 78.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to 

{'eval_loss': 0.49455544352531433, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.101, 'eval_samples_per_second': 9.898, 'eval_steps_per_second': 9.898, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:14,  1.65s/it]

{'eval_loss': 0.41717976331710815, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.09, 'eval_samples_per_second': 11.115, 'eval_steps_per_second': 11.115, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.64s/it]

{'eval_loss': 0.4002384543418884, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.686, 'eval_steps_per_second': 12.686, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.96s/it]


{'train_runtime': 52.9895, 'train_samples_per_second': 3.793, 'train_steps_per_second': 0.51, 'train_loss': 0.41322782304551864, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.27it/s]
100%|██████████| 1/1 [00:00<00:00, 165.10it/s]
100%|██████████| 1/1 [00:00<00:00, 141.16it/s]
Casting the dataset: 100%|██████████| 67/67 [00:00<00:00, 16753.21 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 937.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 79.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.49455544352531433, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0873, 'eval_samples_per_second': 11.45, 'eval_steps_per_second': 11.45, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.70s/it]

{'eval_loss': 0.41717976331710815, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0944, 'eval_samples_per_second': 10.591, 'eval_steps_per_second': 10.591, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.71s/it]

{'eval_loss': 0.4002384543418884, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.07, 'eval_steps_per_second': 12.07, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.01s/it]


{'train_runtime': 54.1746, 'train_samples_per_second': 3.71, 'train_steps_per_second': 0.498, 'train_loss': 0.41322782304551864, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.63it/s]
100%|██████████| 1/1 [00:00<00:00, 110.24it/s]
100%|██████████| 1/1 [00:00<00:00, 164.82it/s]
Casting the dataset: 100%|██████████| 67/67 [00:00<00:00, 16586.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 485.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 77.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.49455544352531433, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1339, 'eval_samples_per_second': 7.47, 'eval_steps_per_second': 7.47, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:14,  1.65s/it]

{'eval_loss': 0.41717976331710815, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1293, 'eval_samples_per_second': 7.731, 'eval_steps_per_second': 7.731, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.65s/it]

{'eval_loss': 0.4002384543418884, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1354, 'eval_samples_per_second': 7.386, 'eval_steps_per_second': 7.386, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.96s/it]


{'train_runtime': 52.855, 'train_samples_per_second': 3.803, 'train_steps_per_second': 0.511, 'train_loss': 0.41322782304551864, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 151.04it/s]
100%|██████████| 1/1 [00:00<00:00, 165.07it/s]
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
Casting the dataset: 100%|██████████| 67/67 [00:00<00:00, 16532.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.51 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 79.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.49455544352531433, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.141, 'eval_samples_per_second': 7.09, 'eval_steps_per_second': 7.09, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.69s/it]

{'eval_loss': 0.41717976331710815, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1369, 'eval_samples_per_second': 7.303, 'eval_steps_per_second': 7.303, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.69s/it]

{'eval_loss': 0.4002384543418884, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1319, 'eval_samples_per_second': 7.58, 'eval_steps_per_second': 7.58, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.00s/it]


{'train_runtime': 54.0979, 'train_samples_per_second': 3.715, 'train_steps_per_second': 0.499, 'train_loss': 0.41322782304551864, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
100%|██████████| 1/1 [00:00<00:00, 110.24it/s]
100%|██████████| 1/1 [00:00<00:00, 141.36it/s]
Casting the dataset: 100%|██████████| 68/68 [00:00<00:00, 16852.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 78.72 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.4940871298313141, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1345, 'eval_samples_per_second': 7.435, 'eval_steps_per_second': 7.435, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.71s/it]

{'eval_loss': 0.41687074303627014, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1031, 'eval_samples_per_second': 9.7, 'eval_steps_per_second': 9.7, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.71s/it]

{'eval_loss': 0.3999747037887573, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1336, 'eval_samples_per_second': 7.487, 'eval_steps_per_second': 7.487, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  1.98s/it]


{'train_runtime': 53.5174, 'train_samples_per_second': 3.812, 'train_steps_per_second': 0.505, 'train_loss': 0.41289601502595125, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.91it/s]
100%|██████████| 1/1 [00:00<00:00, 141.61it/s]
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
Casting the dataset: 100%|██████████| 68/68 [00:00<00:00, 17007.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 78.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.55 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use i

{'eval_loss': 0.4940871298313141, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0864, 'eval_samples_per_second': 11.576, 'eval_steps_per_second': 11.576, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.71s/it]

{'eval_loss': 0.41687074303627014, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1628, 'eval_samples_per_second': 6.144, 'eval_steps_per_second': 6.144, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.74s/it]

{'eval_loss': 0.3999747037887573, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1319, 'eval_samples_per_second': 7.583, 'eval_steps_per_second': 7.583, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  1.99s/it]


{'train_runtime': 53.6649, 'train_samples_per_second': 3.801, 'train_steps_per_second': 0.503, 'train_loss': 0.41289601502595125, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.42it/s]
100%|██████████| 1/1 [00:00<00:00, 141.39it/s]
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
Casting the dataset: 100%|██████████| 68/68 [00:00<00:00, 17013.40 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 954.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.45 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 964.43 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 78.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.4940871298313141, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1263, 'eval_samples_per_second': 7.918, 'eval_steps_per_second': 7.918, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:33<00:14,  1.64s/it]

{'eval_loss': 0.41687074303627014, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.901, 'eval_steps_per_second': 11.901, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:51<00:00,  1.68s/it]

{'eval_loss': 0.3999747037887573, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.245, 'eval_steps_per_second': 11.245, 'epoch': 3.0}


100%|██████████| 27/27 [00:52<00:00,  1.96s/it]


{'train_runtime': 52.793, 'train_samples_per_second': 3.864, 'train_steps_per_second': 0.511, 'train_loss': 0.41289601502595125, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
100%|██████████| 1/1 [00:00<00:00, 164.57it/s]
100%|██████████| 1/1 [00:00<00:00, 141.63it/s]
Casting the dataset: 100%|██████████| 68/68 [00:00<00:00, 16791.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 77.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.4940871298313141, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1302, 'eval_samples_per_second': 7.678, 'eval_steps_per_second': 7.678, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.73s/it]

{'eval_loss': 0.41687074303627014, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0885, 'eval_samples_per_second': 11.298, 'eval_steps_per_second': 11.298, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:53<00:00,  1.75s/it]

{'eval_loss': 0.3999747037887573, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1283, 'eval_samples_per_second': 7.793, 'eval_steps_per_second': 7.793, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.02s/it]


{'train_runtime': 54.5009, 'train_samples_per_second': 3.743, 'train_steps_per_second': 0.495, 'train_loss': 0.41289601502595125, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 157.98it/s]
100%|██████████| 1/1 [00:00<00:00, 141.13it/s]
100%|██████████| 1/1 [00:00<00:00, 141.33it/s]
Casting the dataset: 100%|██████████| 68/68 [00:00<00:00, 16853.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 78.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able

{'eval_loss': 0.4940871298313141, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1334, 'eval_samples_per_second': 7.498, 'eval_steps_per_second': 7.498, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.71s/it]

{'eval_loss': 0.41687074303627014, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.092, 'eval_samples_per_second': 10.869, 'eval_steps_per_second': 10.869, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.69s/it]

{'eval_loss': 0.3999747037887573, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0941, 'eval_samples_per_second': 10.631, 'eval_steps_per_second': 10.631, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  1.98s/it]


{'train_runtime': 53.519, 'train_samples_per_second': 3.812, 'train_steps_per_second': 0.504, 'train_loss': 0.41289601502595125, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 119.43it/s]
100%|██████████| 1/1 [00:00<00:00, 141.33it/s]
100%|██████████| 1/1 [00:00<00:00, 140.93it/s]
Casting the dataset: 100%|██████████| 69/69 [00:00<00:00, 17250.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.38 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 77.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.49416449666023254, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.995, 'eval_steps_per_second': 11.995, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:16,  1.80s/it]

{'eval_loss': 0.4166446626186371, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.22, 'eval_steps_per_second': 12.22, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:53<00:00,  1.78s/it]

{'eval_loss': 0.3996977210044861, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.378, 'eval_steps_per_second': 12.378, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


{'train_runtime': 54.6772, 'train_samples_per_second': 3.786, 'train_steps_per_second': 0.494, 'train_loss': 0.41271983252631295, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 140.75it/s]
100%|██████████| 1/1 [00:00<00:00, 141.72it/s]
100%|██████████| 1/1 [00:00<00:00, 141.72it/s]
Casting the dataset: 100%|██████████| 69/69 [00:00<00:00, 17103.42 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 78.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 319.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.58 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.49416449666023254, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0804, 'eval_samples_per_second': 12.431, 'eval_steps_per_second': 12.431, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:16,  1.79s/it]

{'eval_loss': 0.4166446626186371, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.2, 'eval_steps_per_second': 13.2, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.76s/it]

{'eval_loss': 0.3996977210044861, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.229, 'eval_steps_per_second': 12.229, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.01s/it]


{'train_runtime': 54.2724, 'train_samples_per_second': 3.814, 'train_steps_per_second': 0.497, 'train_loss': 0.41271983252631295, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.66it/s]
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
100%|██████████| 1/1 [00:00<00:00, 141.61it/s]
Casting the dataset: 100%|██████████| 69/69 [00:00<00:00, 17252.28 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 75.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.49416449666023254, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.097, 'eval_samples_per_second': 10.308, 'eval_steps_per_second': 10.308, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:16,  1.78s/it]

{'eval_loss': 0.4166446626186371, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.225, 'eval_steps_per_second': 12.225, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.77s/it]

{'eval_loss': 0.3996977210044861, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.932, 'eval_steps_per_second': 11.932, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.00s/it]


{'train_runtime': 54.0263, 'train_samples_per_second': 3.831, 'train_steps_per_second': 0.5, 'train_loss': 0.41271983252631295, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
100%|██████████| 1/1 [00:00<00:00, 141.68it/s]
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
Casting the dataset: 100%|██████████| 69/69 [00:00<00:00, 16974.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 77.64 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.49416449666023254, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0852, 'eval_samples_per_second': 11.732, 'eval_steps_per_second': 11.732, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:15,  1.77s/it]

{'eval_loss': 0.4166446626186371, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.197, 'eval_steps_per_second': 12.197, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.80s/it]

{'eval_loss': 0.3996977210044861, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.859, 'eval_steps_per_second': 12.859, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.01s/it]


{'train_runtime': 54.2566, 'train_samples_per_second': 3.815, 'train_steps_per_second': 0.498, 'train_loss': 0.41271983252631295, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.88it/s]
100%|██████████| 1/1 [00:00<00:00, 141.59it/s]
100%|██████████| 1/1 [00:00<00:00, 164.43it/s]
Casting the dataset: 100%|██████████| 69/69 [00:00<00:00, 13668.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 953.25 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 77.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.32 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.49416449666023254, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.221, 'eval_steps_per_second': 12.221, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:16,  1.79s/it]

{'eval_loss': 0.4166446626186371, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.028, 'eval_steps_per_second': 13.028, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:53<00:00,  1.81s/it]

{'eval_loss': 0.3996977210044861, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.782, 'eval_steps_per_second': 11.782, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


{'train_runtime': 54.8316, 'train_samples_per_second': 3.775, 'train_steps_per_second': 0.492, 'train_loss': 0.41271983252631295, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 99.17it/s]
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
Casting the dataset: 100%|██████████| 70/70 [00:00<00:00, 17501.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 77.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able 

{'eval_loss': 0.49351930618286133, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.37, 'eval_steps_per_second': 12.37, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:16,  1.80s/it]

{'eval_loss': 0.41656824946403503, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.927, 'eval_steps_per_second': 11.927, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.81s/it]

{'eval_loss': 0.3995647132396698, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.073, 'eval_steps_per_second': 12.073, 'epoch': 3.0}


100%|██████████| 27/27 [00:53<00:00,  2.00s/it]


{'train_runtime': 53.8785, 'train_samples_per_second': 3.898, 'train_steps_per_second': 0.501, 'train_loss': 0.41273325460928456, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.58it/s]
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
100%|██████████| 1/1 [00:00<00:00, 149.90it/s]
Casting the dataset: 100%|██████████| 70/70 [00:00<00:00, 23318.34 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 951.09 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 77.34 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.49351930618286133, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.846, 'eval_steps_per_second': 12.846, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:16,  1.85s/it]

{'eval_loss': 0.41656824946403503, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.058, 'eval_steps_per_second': 12.058, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:53<00:00,  1.84s/it]

{'eval_loss': 0.3995647132396698, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.51, 'eval_steps_per_second': 11.51, 'epoch': 3.0}


100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


{'train_runtime': 55.2195, 'train_samples_per_second': 3.803, 'train_steps_per_second': 0.489, 'train_loss': 0.41273325460928456, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.97it/s]
100%|██████████| 1/1 [00:00<00:00, 141.76it/s]
100%|██████████| 1/1 [00:00<00:00, 99.28it/s]
Casting the dataset: 100%|██████████| 70/70 [00:00<00:00, 17486.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 76.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.49351930618286133, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.226, 'eval_steps_per_second': 12.226, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:16,  1.83s/it]

{'eval_loss': 0.41656824946403503, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0804, 'eval_samples_per_second': 12.431, 'eval_steps_per_second': 12.431, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:53<00:00,  1.84s/it]

{'eval_loss': 0.3995647132396698, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.919, 'eval_steps_per_second': 11.919, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


{'train_runtime': 54.8154, 'train_samples_per_second': 3.831, 'train_steps_per_second': 0.493, 'train_loss': 0.41273325460928456, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.38it/s]
100%|██████████| 1/1 [00:00<00:00, 123.91it/s]
100%|██████████| 1/1 [00:00<00:00, 161.94it/s]
Casting the dataset: 100%|██████████| 70/70 [00:00<00:00, 17497.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 944.66 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 76.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.49351930618286133, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0845, 'eval_samples_per_second': 11.835, 'eval_steps_per_second': 11.835, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:16,  1.83s/it]

{'eval_loss': 0.41656824946403503, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.929, 'eval_steps_per_second': 11.929, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:53<00:00,  1.81s/it]

{'eval_loss': 0.3995647132396698, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.008, 'eval_steps_per_second': 13.008, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


{'train_runtime': 54.874, 'train_samples_per_second': 3.827, 'train_steps_per_second': 0.492, 'train_loss': 0.41273325460928456, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.79it/s]
100%|██████████| 1/1 [00:00<00:00, 141.17it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 70/70 [00:00<00:00, 17317.52 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 951.95 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 76.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.19 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.49351930618286133, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.044, 'eval_steps_per_second': 12.044, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:34<00:16,  1.84s/it]

{'eval_loss': 0.41656824946403503, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0759, 'eval_samples_per_second': 13.183, 'eval_steps_per_second': 13.183, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:52<00:00,  1.83s/it]

{'eval_loss': 0.3995647132396698, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.364, 'eval_steps_per_second': 12.364, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.01s/it]


{'train_runtime': 54.3067, 'train_samples_per_second': 3.867, 'train_steps_per_second': 0.497, 'train_loss': 0.41273325460928456, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
100%|██████████| 1/1 [00:00<00:00, 141.42it/s]
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
Casting the dataset: 100%|██████████| 71/71 [00:00<00:00, 17071.52 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.26 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 73.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.87 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.4932607114315033, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.21, 'eval_steps_per_second': 12.21, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:16,  1.88s/it]

{'eval_loss': 0.4163036644458771, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.376, 'eval_steps_per_second': 12.376, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:53<00:00,  1.84s/it]

{'eval_loss': 0.39928632974624634, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.634, 'eval_steps_per_second': 11.634, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.03s/it]


{'train_runtime': 54.8951, 'train_samples_per_second': 3.88, 'train_steps_per_second': 0.492, 'train_loss': 0.41266006893581814, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.35it/s]
100%|██████████| 1/1 [00:00<00:00, 141.41it/s]
100%|██████████| 1/1 [00:00<00:00, 164.94it/s]
Casting the dataset: 100%|██████████| 71/71 [00:00<00:00, 17449.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 76.31 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.4932607114315033, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.22, 'eval_steps_per_second': 12.22, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:16,  1.87s/it]

{'eval_loss': 0.4163036644458771, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0761, 'eval_samples_per_second': 13.141, 'eval_steps_per_second': 13.141, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:53<00:00,  1.85s/it]

{'eval_loss': 0.39928632974624634, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.076, 'eval_samples_per_second': 13.153, 'eval_steps_per_second': 13.153, 'epoch': 3.0}


100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


{'train_runtime': 55.0615, 'train_samples_per_second': 3.868, 'train_steps_per_second': 0.49, 'train_loss': 0.41266006893581814, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
100%|██████████| 1/1 [00:00<00:00, 26.72it/s]
100%|██████████| 1/1 [00:00<00:00, 141.52it/s]
Casting the dataset: 100%|██████████| 71/71 [00:00<00:00, 17762.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 953.25 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1003.66 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 76.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.4932607114315033, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0738, 'eval_samples_per_second': 13.55, 'eval_steps_per_second': 13.55, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:16,  1.86s/it]

{'eval_loss': 0.4163036644458771, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.286, 'eval_steps_per_second': 12.286, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:53<00:00,  1.83s/it]

{'eval_loss': 0.39928632974624634, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.075, 'eval_steps_per_second': 12.075, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.02s/it]


{'train_runtime': 54.6554, 'train_samples_per_second': 3.897, 'train_steps_per_second': 0.494, 'train_loss': 0.41266006893581814, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 163.40it/s]
100%|██████████| 1/1 [00:00<00:00, 141.78it/s]
100%|██████████| 1/1 [00:00<00:00, 164.94it/s]
Casting the dataset: 100%|██████████| 71/71 [00:00<00:00, 17596.05 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Map: 100%|██████████| 71/71 [00:01<00:00, 70.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able

{'eval_loss': 0.4932607114315033, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0789, 'eval_samples_per_second': 12.67, 'eval_steps_per_second': 12.67, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:16,  1.88s/it]

{'eval_loss': 0.4163036644458771, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.231, 'eval_steps_per_second': 12.231, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:53<00:00,  1.83s/it]

{'eval_loss': 0.39928632974624634, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.363, 'eval_steps_per_second': 13.363, 'epoch': 3.0}


100%|██████████| 27/27 [00:54<00:00,  2.02s/it]


{'train_runtime': 54.6629, 'train_samples_per_second': 3.897, 'train_steps_per_second': 0.494, 'train_loss': 0.41266006893581814, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 110.25it/s]
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
100%|██████████| 1/1 [00:00<00:00, 141.57it/s]
Casting the dataset: 100%|██████████| 71/71 [00:00<00:00, 17550.42 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 964.21 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 74.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 315.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.22 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.4932607114315033, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.286, 'eval_steps_per_second': 12.286, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:16,  1.85s/it]

{'eval_loss': 0.4163036644458771, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.017, 'eval_steps_per_second': 13.017, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:53<00:00,  1.91s/it]

{'eval_loss': 0.39928632974624634, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.785, 'eval_steps_per_second': 11.785, 'epoch': 3.0}


100%|██████████| 27/27 [00:55<00:00,  2.04s/it]


{'train_runtime': 55.1251, 'train_samples_per_second': 3.864, 'train_steps_per_second': 0.49, 'train_loss': 0.41266006893581814, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.30it/s]
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
100%|██████████| 1/1 [00:00<00:00, 165.08it/s]
Casting the dataset: 100%|██████████| 72/72 [00:00<00:00, 17824.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 936.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.04 examples/s]
Map: 100%|██████████| 72/72 [00:01<00:00, 70.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.49329042434692383, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.787, 'eval_steps_per_second': 11.787, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:17,  1.92s/it]

{'eval_loss': 0.4161894917488098, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.92, 'eval_steps_per_second': 11.92, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:54<00:00,  1.92s/it]

{'eval_loss': 0.39905256032943726, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.078, 'eval_steps_per_second': 12.078, 'epoch': 3.0}


100%|██████████| 27/27 [00:55<00:00,  2.06s/it]


{'train_runtime': 55.5349, 'train_samples_per_second': 3.889, 'train_steps_per_second': 0.486, 'train_loss': 0.41256162855360246, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.90it/s]
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
100%|██████████| 1/1 [00:00<00:00, 164.53it/s]
Casting the dataset: 100%|██████████| 72/72 [00:00<00:00, 17787.13 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 75.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.49329042434692383, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.191, 'eval_steps_per_second': 12.191, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:17,  1.93s/it]

{'eval_loss': 0.4161894917488098, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.376, 'eval_steps_per_second': 12.376, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:54<00:00,  1.95s/it]

{'eval_loss': 0.39905256032943726, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.783, 'eval_steps_per_second': 11.783, 'epoch': 3.0}


100%|██████████| 27/27 [00:55<00:00,  2.07s/it]


{'train_runtime': 55.9329, 'train_samples_per_second': 3.862, 'train_steps_per_second': 0.483, 'train_loss': 0.41256162855360246, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.32it/s]
100%|██████████| 1/1 [00:00<00:00, 139.54it/s]
100%|██████████| 1/1 [00:00<00:00, 140.94it/s]
Casting the dataset: 100%|██████████| 72/72 [00:00<00:00, 18008.82 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.98 examples/s]
Map: 100%|██████████| 72/72 [00:01<00:00, 71.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 334.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.49329042434692383, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.381, 'eval_steps_per_second': 12.381, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:17,  1.94s/it]

{'eval_loss': 0.4161894917488098, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.194, 'eval_steps_per_second': 12.194, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:54<00:00,  1.91s/it]

{'eval_loss': 0.39905256032943726, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.931, 'eval_steps_per_second': 11.931, 'epoch': 3.0}


100%|██████████| 27/27 [00:55<00:00,  2.06s/it]


{'train_runtime': 55.7284, 'train_samples_per_second': 3.876, 'train_steps_per_second': 0.484, 'train_loss': 0.41256162855360246, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.28it/s]
100%|██████████| 1/1 [00:00<00:00, 166.66it/s]
100%|██████████| 1/1 [00:00<00:00, 123.61it/s]
Casting the dataset: 100%|██████████| 72/72 [00:00<00:00, 17845.99 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.91 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 74.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.32 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.49329042434692383, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.837, 'eval_steps_per_second': 12.837, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:17,  1.95s/it]

{'eval_loss': 0.4161894917488098, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.223, 'eval_steps_per_second': 12.223, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:54<00:00,  1.90s/it]

{'eval_loss': 0.39905256032943726, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.221, 'eval_steps_per_second': 12.221, 'epoch': 3.0}


100%|██████████| 27/27 [00:55<00:00,  2.05s/it]


{'train_runtime': 55.3952, 'train_samples_per_second': 3.899, 'train_steps_per_second': 0.487, 'train_loss': 0.41256162855360246, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 110.17it/s]
100%|██████████| 1/1 [00:00<00:00, 141.19it/s]
100%|██████████| 1/1 [00:00<00:00, 140.82it/s]
Casting the dataset: 100%|██████████| 72/72 [00:00<00:00, 17844.94 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 73.92 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 331.67 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.49329042434692383, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.789, 'eval_steps_per_second': 11.789, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 18/27 [00:35<00:17,  1.94s/it]

{'eval_loss': 0.4161894917488098, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.797, 'eval_steps_per_second': 11.797, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 27/27 [00:54<00:00,  1.93s/it]

{'eval_loss': 0.39905256032943726, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0778, 'eval_samples_per_second': 12.859, 'eval_steps_per_second': 12.859, 'epoch': 3.0}


100%|██████████| 27/27 [00:55<00:00,  2.07s/it]


{'train_runtime': 55.9553, 'train_samples_per_second': 3.86, 'train_steps_per_second': 0.483, 'train_loss': 0.41256162855360246, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.37it/s]
100%|██████████| 1/1 [00:00<00:00, 124.11it/s]
100%|██████████| 1/1 [00:00<00:00, 164.86it/s]
Casting the dataset: 100%|██████████| 73/73 [00:00<00:00, 24337.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 956.29 examples/s]
Map: 100%|██████████| 73/73 [00:01<00:00, 72.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.4745744466781616, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.502, 'eval_steps_per_second': 11.502, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:15,  1.58s/it]

{'eval_loss': 0.40231356024742126, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.19, 'eval_steps_per_second': 12.19, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:57<00:00,  1.57s/it]

{'eval_loss': 0.38950031995773315, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.07, 'eval_steps_per_second': 12.07, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.95s/it]


{'train_runtime': 58.3815, 'train_samples_per_second': 3.751, 'train_steps_per_second': 0.514, 'train_loss': 0.3726764361063639, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.30it/s]
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
100%|██████████| 1/1 [00:00<00:00, 164.31it/s]
Casting the dataset: 100%|██████████| 73/73 [00:00<00:00, 14469.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 73/73 [00:01<00:00, 71.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be abl

{'eval_loss': 0.5889418125152588, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.854, 'eval_steps_per_second': 11.854, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:15,  1.54s/it]

{'eval_loss': 0.43423202633857727, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.244, 'eval_steps_per_second': 11.244, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.55s/it]

{'eval_loss': 0.3972094655036926, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.648, 'eval_steps_per_second': 11.648, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.92s/it]


{'train_runtime': 57.6842, 'train_samples_per_second': 3.797, 'train_steps_per_second': 0.52, 'train_loss': 0.4498135566711426, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
100%|██████████| 1/1 [00:00<00:00, 70.54it/s]
100%|██████████| 1/1 [00:00<00:00, 70.67it/s]
Casting the dataset: 100%|██████████| 73/73 [00:00<00:00, 17979.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 73/73 [00:01<00:00, 72.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.35 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.93 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.5889418125152588, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.061, 'eval_steps_per_second': 12.061, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:15,  1.57s/it]

{'eval_loss': 0.43423202633857727, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.508, 'eval_steps_per_second': 11.508, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.57s/it]

{'eval_loss': 0.3972094655036926, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0798, 'eval_samples_per_second': 12.529, 'eval_steps_per_second': 12.529, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.94s/it]


{'train_runtime': 58.1947, 'train_samples_per_second': 3.763, 'train_steps_per_second': 0.516, 'train_loss': 0.4498135566711426, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.41it/s]
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
100%|██████████| 1/1 [00:00<00:00, 141.29it/s]
Casting the dataset: 100%|██████████| 73/73 [00:00<00:00, 18022.50 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.51 examples/s]
Map: 100%|██████████| 73/73 [00:01<00:00, 72.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.76 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5889418125152588, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.088, 'eval_samples_per_second': 11.364, 'eval_steps_per_second': 11.364, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:36<00:15,  1.53s/it]

{'eval_loss': 0.43423202633857727, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.075, 'eval_steps_per_second': 12.075, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.54s/it]

{'eval_loss': 0.3972094655036926, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.852, 'eval_steps_per_second': 11.852, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.92s/it]


{'train_runtime': 57.4573, 'train_samples_per_second': 3.812, 'train_steps_per_second': 0.522, 'train_loss': 0.4498135566711426, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.63it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 142.51it/s]
Casting the dataset: 100%|██████████| 73/73 [00:00<00:00, 17998.13 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.46 examples/s]
Map: 100%|██████████| 73/73 [00:01<00:00, 72.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able

{'eval_loss': 0.5889418125152588, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.087, 'eval_samples_per_second': 11.495, 'eval_steps_per_second': 11.495, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:35<00:15,  1.52s/it]

{'eval_loss': 0.43423202633857727, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0806, 'eval_samples_per_second': 12.402, 'eval_steps_per_second': 12.402, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:54<00:00,  1.52s/it]

{'eval_loss': 0.3972094655036926, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.14, 'eval_steps_per_second': 12.14, 'epoch': 3.0}


100%|██████████| 30/30 [00:56<00:00,  1.87s/it]


{'train_runtime': 56.1416, 'train_samples_per_second': 3.901, 'train_steps_per_second': 0.534, 'train_loss': 0.4498135566711426, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.86it/s]
100%|██████████| 1/1 [00:00<00:00, 141.80it/s]
100%|██████████| 1/1 [00:00<00:00, 141.24it/s]
Casting the dataset: 100%|██████████| 74/74 [00:00<00:00, 14705.00 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Map: 100%|██████████| 74/74 [00:01<00:00, 72.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.35 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.5875974893569946, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.923, 'eval_steps_per_second': 11.923, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:36<00:15,  1.59s/it]

{'eval_loss': 0.43319642543792725, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1154, 'eval_samples_per_second': 8.663, 'eval_steps_per_second': 8.663, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.59s/it]

{'eval_loss': 0.3964597284793854, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0889, 'eval_samples_per_second': 11.247, 'eval_steps_per_second': 11.247, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.92s/it]


{'train_runtime': 57.4795, 'train_samples_per_second': 3.862, 'train_steps_per_second': 0.522, 'train_loss': 0.4475014050801595, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.76it/s]
100%|██████████| 1/1 [00:00<00:00, 164.54it/s]
100%|██████████| 1/1 [00:00<00:00, 141.62it/s]
Casting the dataset: 100%|██████████| 74/74 [00:00<00:00, 18287.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 74/74 [00:01<00:00, 72.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5875974893569946, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.073, 'eval_steps_per_second': 12.073, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:15,  1.59s/it]

{'eval_loss': 0.43319642543792725, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0774, 'eval_samples_per_second': 12.925, 'eval_steps_per_second': 12.925, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.58s/it]

{'eval_loss': 0.3964597284793854, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0827, 'eval_samples_per_second': 12.089, 'eval_steps_per_second': 12.089, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.92s/it]


{'train_runtime': 57.7297, 'train_samples_per_second': 3.846, 'train_steps_per_second': 0.52, 'train_loss': 0.4475014050801595, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.55it/s]
100%|██████████| 1/1 [00:00<00:00, 166.62it/s]
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
Casting the dataset: 100%|██████████| 74/74 [00:00<00:00, 24697.90 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 74/74 [00:01<00:00, 72.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5875974893569946, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0858, 'eval_samples_per_second': 11.653, 'eval_steps_per_second': 11.653, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:36<00:15,  1.55s/it]

{'eval_loss': 0.43319642543792725, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.213, 'eval_steps_per_second': 12.213, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:55<00:00,  1.59s/it]

{'eval_loss': 0.3964597284793854, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.015, 'eval_steps_per_second': 13.015, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.90s/it]


{'train_runtime': 57.0634, 'train_samples_per_second': 3.89, 'train_steps_per_second': 0.526, 'train_loss': 0.4475014050801595, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.26it/s]
100%|██████████| 1/1 [00:00<00:00, 141.80it/s]
100%|██████████| 1/1 [00:00<00:00, 140.92it/s]
Casting the dataset: 100%|██████████| 74/74 [00:00<00:00, 18199.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.08 examples/s]
Map: 100%|██████████| 74/74 [00:01<00:00, 71.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.35 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.5875974893569946, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.692, 'eval_steps_per_second': 12.692, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:15,  1.58s/it]

{'eval_loss': 0.43319642543792725, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0828, 'eval_samples_per_second': 12.083, 'eval_steps_per_second': 12.083, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.60s/it]

{'eval_loss': 0.3964597284793854, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.799, 'eval_steps_per_second': 11.799, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.92s/it]


{'train_runtime': 57.5917, 'train_samples_per_second': 3.855, 'train_steps_per_second': 0.521, 'train_loss': 0.4475014050801595, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
100%|██████████| 1/1 [00:00<00:00, 164.68it/s]
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
Casting the dataset: 100%|██████████| 74/74 [00:00<00:00, 18286.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.51 examples/s]
Map: 100%|██████████| 74/74 [00:01<00:00, 71.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.5875974893569946, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.134, 'eval_steps_per_second': 12.134, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:15,  1.58s/it]

{'eval_loss': 0.43319642543792725, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.513, 'eval_steps_per_second': 11.513, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.57s/it]

{'eval_loss': 0.3964597284793854, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.221, 'eval_steps_per_second': 12.221, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.92s/it]


{'train_runtime': 57.5948, 'train_samples_per_second': 3.855, 'train_steps_per_second': 0.521, 'train_loss': 0.4475014050801595, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.39it/s]
100%|██████████| 1/1 [00:00<00:00, 165.18it/s]
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
Casting the dataset: 100%|██████████| 75/75 [00:00<00:00, 18750.24 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.14 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Map: 100%|██████████| 75/75 [00:01<00:00, 70.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.5885934233665466, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1371, 'eval_samples_per_second': 7.293, 'eval_steps_per_second': 7.293, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:36<00:16,  1.61s/it]

{'eval_loss': 0.43217286467552185, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.51, 'eval_steps_per_second': 11.51, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.63s/it]

{'eval_loss': 0.39579272270202637, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1343, 'eval_samples_per_second': 7.445, 'eval_steps_per_second': 7.445, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.92s/it]


{'train_runtime': 57.7378, 'train_samples_per_second': 3.897, 'train_steps_per_second': 0.52, 'train_loss': 0.44696203867594403, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.17it/s]
100%|██████████| 1/1 [00:00<00:00, 165.12it/s]
100%|██████████| 1/1 [00:00<00:00, 141.11it/s]
Casting the dataset: 100%|██████████| 75/75 [00:00<00:00, 18761.42 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 953.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 75/75 [00:01<00:00, 71.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.5885934233665466, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.785, 'eval_steps_per_second': 11.785, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:16,  1.62s/it]

{'eval_loss': 0.43217286467552185, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0795, 'eval_samples_per_second': 12.572, 'eval_steps_per_second': 12.572, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.61s/it]

{'eval_loss': 0.39579272270202637, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0789, 'eval_samples_per_second': 12.673, 'eval_steps_per_second': 12.673, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.91s/it]


{'train_runtime': 57.4317, 'train_samples_per_second': 3.918, 'train_steps_per_second': 0.522, 'train_loss': 0.44696203867594403, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 151.41it/s]
100%|██████████| 1/1 [00:00<00:00, 165.16it/s]
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
Casting the dataset: 100%|██████████| 75/75 [00:00<00:00, 18379.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 953.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Map: 100%|██████████| 75/75 [00:01<00:00, 71.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 273.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.5885934233665466, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.048, 'eval_steps_per_second': 12.048, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:16,  1.61s/it]

{'eval_loss': 0.43217286467552185, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0785, 'eval_samples_per_second': 12.736, 'eval_steps_per_second': 12.736, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.62s/it]

{'eval_loss': 0.39579272270202637, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.088, 'eval_samples_per_second': 11.367, 'eval_steps_per_second': 11.367, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.94s/it]


{'train_runtime': 58.1127, 'train_samples_per_second': 3.872, 'train_steps_per_second': 0.516, 'train_loss': 0.44696203867594403, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.86it/s]
100%|██████████| 1/1 [00:00<00:00, 141.62it/s]
100%|██████████| 1/1 [00:00<00:00, 141.27it/s]
Casting the dataset: 100%|██████████| 75/75 [00:00<00:00, 18485.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 75/75 [00:01<00:00, 71.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.39 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5885934233665466, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.086, 'eval_samples_per_second': 11.627, 'eval_steps_per_second': 11.627, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:16,  1.64s/it]

{'eval_loss': 0.43217286467552185, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0956, 'eval_samples_per_second': 10.459, 'eval_steps_per_second': 10.459, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.60s/it]

{'eval_loss': 0.39579272270202637, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.207, 'eval_steps_per_second': 12.207, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.94s/it]


{'train_runtime': 58.2717, 'train_samples_per_second': 3.861, 'train_steps_per_second': 0.515, 'train_loss': 0.44696203867594403, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.50it/s]
100%|██████████| 1/1 [00:00<00:00, 141.46it/s]
100%|██████████| 1/1 [00:00<00:00, 165.06it/s]
Casting the dataset: 100%|██████████| 75/75 [00:00<00:00, 18481.45 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 954.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.22 examples/s]
Map: 100%|██████████| 75/75 [00:01<00:00, 70.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.86 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.5885934233665466, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.509, 'eval_steps_per_second': 11.509, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:16,  1.64s/it]

{'eval_loss': 0.43217286467552185, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0839, 'eval_samples_per_second': 11.924, 'eval_steps_per_second': 11.924, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.62s/it]

{'eval_loss': 0.39579272270202637, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.061, 'eval_steps_per_second': 12.061, 'epoch': 3.0}


100%|██████████| 30/30 [00:57<00:00,  1.93s/it]


{'train_runtime': 57.8583, 'train_samples_per_second': 3.889, 'train_steps_per_second': 0.519, 'train_loss': 0.44696203867594403, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 166.68it/s]
100%|██████████| 1/1 [00:00<00:00, 141.61it/s]
Casting the dataset: 100%|██████████| 76/76 [00:00<00:00, 19009.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 485.28 examples/s]
Map: 100%|██████████| 76/76 [00:01<00:00, 68.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.87 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.5888667702674866, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0961, 'eval_samples_per_second': 10.408, 'eval_steps_per_second': 10.408, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:16,  1.66s/it]

{'eval_loss': 0.4318135678768158, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.903, 'eval_steps_per_second': 11.903, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.66s/it]

{'eval_loss': 0.39554277062416077, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1265, 'eval_samples_per_second': 7.905, 'eval_steps_per_second': 7.905, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.94s/it]


{'train_runtime': 58.2735, 'train_samples_per_second': 3.913, 'train_steps_per_second': 0.515, 'train_loss': 0.44667784372965497, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
100%|██████████| 1/1 [00:00<00:00, 141.69it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
Casting the dataset: 100%|██████████| 76/76 [00:00<00:00, 15063.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 76/76 [00:01<00:00, 68.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.64 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.5888667702674866, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.09, 'eval_samples_per_second': 11.113, 'eval_steps_per_second': 11.113, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:16,  1.69s/it]

{'eval_loss': 0.4318135678768158, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.087, 'eval_samples_per_second': 11.49, 'eval_steps_per_second': 11.49, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.66s/it]

{'eval_loss': 0.39554277062416077, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1244, 'eval_samples_per_second': 8.038, 'eval_steps_per_second': 8.038, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.94s/it]


{'train_runtime': 58.2816, 'train_samples_per_second': 3.912, 'train_steps_per_second': 0.515, 'train_loss': 0.44667784372965497, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.26it/s]
100%|██████████| 1/1 [00:00<00:00, 141.67it/s]
100%|██████████| 1/1 [00:00<00:00, 141.71it/s]
Casting the dataset: 100%|██████████| 76/76 [00:00<00:00, 19005.91 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.75 examples/s]
Map: 100%|██████████| 76/76 [00:01<00:00, 69.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 331.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.5888667702674866, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1175, 'eval_samples_per_second': 8.513, 'eval_steps_per_second': 8.513, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:16,  1.68s/it]

{'eval_loss': 0.4318135678768158, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.082, 'eval_samples_per_second': 12.2, 'eval_steps_per_second': 12.2, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:57<00:00,  1.67s/it]

{'eval_loss': 0.39554277062416077, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0774, 'eval_samples_per_second': 12.917, 'eval_steps_per_second': 12.917, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.95s/it]


{'train_runtime': 58.5377, 'train_samples_per_second': 3.895, 'train_steps_per_second': 0.512, 'train_loss': 0.44667784372965497, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.70it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 141.31it/s]
Casting the dataset: 100%|██████████| 76/76 [00:00<00:00, 15193.13 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 953.25 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 951.52 examples/s]
Map: 100%|██████████| 76/76 [00:01<00:00, 69.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5888667702674866, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1575, 'eval_samples_per_second': 6.35, 'eval_steps_per_second': 6.35, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:16,  1.66s/it]

{'eval_loss': 0.4318135678768158, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.783, 'eval_steps_per_second': 11.783, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.67s/it]

{'eval_loss': 0.39554277062416077, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.676, 'eval_steps_per_second': 11.676, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.95s/it]


{'train_runtime': 58.3624, 'train_samples_per_second': 3.907, 'train_steps_per_second': 0.514, 'train_loss': 0.44667784372965497, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.43it/s]
100%|██████████| 1/1 [00:00<00:00, 141.75it/s]
100%|██████████| 1/1 [00:00<00:00, 141.49it/s]
Casting the dataset: 100%|██████████| 76/76 [00:00<00:00, 19018.38 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 76/76 [00:01<00:00, 68.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 324.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.14 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5888667702674866, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0789, 'eval_samples_per_second': 12.672, 'eval_steps_per_second': 12.672, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:16,  1.67s/it]

{'eval_loss': 0.4318135678768158, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1077, 'eval_samples_per_second': 9.283, 'eval_steps_per_second': 9.283, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:56<00:00,  1.68s/it]

{'eval_loss': 0.39554277062416077, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1084, 'eval_samples_per_second': 9.227, 'eval_steps_per_second': 9.227, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.94s/it]


{'train_runtime': 58.2422, 'train_samples_per_second': 3.915, 'train_steps_per_second': 0.515, 'train_loss': 0.44667784372965497, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.52it/s]
100%|██████████| 1/1 [00:00<00:00, 123.78it/s]
100%|██████████| 1/1 [00:00<00:00, 141.64it/s]
Casting the dataset: 100%|██████████| 77/77 [00:00<00:00, 15261.38 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.94 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.50 examples/s]
Map: 100%|██████████| 77/77 [00:01<00:00, 69.61 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.63 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.5888705849647522, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.277, 'eval_steps_per_second': 12.277, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:17,  1.73s/it]

{'eval_loss': 0.4316439926624298, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.223, 'eval_steps_per_second': 12.223, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:57<00:00,  1.72s/it]

{'eval_loss': 0.39554983377456665, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.638, 'eval_steps_per_second': 11.638, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.96s/it]


{'train_runtime': 58.7178, 'train_samples_per_second': 3.934, 'train_steps_per_second': 0.511, 'train_loss': 0.4464464505513509, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.65it/s]
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
100%|██████████| 1/1 [00:00<00:00, 141.61it/s]
Casting the dataset: 100%|██████████| 77/77 [00:00<00:00, 18995.50 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 77/77 [00:01<00:00, 69.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.5888705849647522, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.211, 'eval_steps_per_second': 12.211, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:17,  1.73s/it]

{'eval_loss': 0.4316439926624298, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.144, 'eval_steps_per_second': 12.144, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:57<00:00,  1.73s/it]

{'eval_loss': 0.39554983377456665, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.504, 'eval_steps_per_second': 11.504, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.95s/it]


{'train_runtime': 58.6467, 'train_samples_per_second': 3.939, 'train_steps_per_second': 0.512, 'train_loss': 0.4464464505513509, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 29.09it/s]
100%|██████████| 1/1 [00:00<00:00, 165.01it/s]
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
Casting the dataset: 100%|██████████| 77/77 [00:00<00:00, 19250.25 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1004.14 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.63 examples/s]
Map: 100%|██████████| 77/77 [00:01<00:00, 67.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5888705849647522, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.794, 'eval_steps_per_second': 11.794, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:17,  1.75s/it]

{'eval_loss': 0.4316439926624298, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.845, 'eval_steps_per_second': 12.845, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:57<00:00,  1.74s/it]

{'eval_loss': 0.39554983377456665, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.997, 'eval_steps_per_second': 11.997, 'epoch': 3.0}


100%|██████████| 30/30 [00:59<00:00,  1.97s/it]


{'train_runtime': 59.0743, 'train_samples_per_second': 3.91, 'train_steps_per_second': 0.508, 'train_loss': 0.4464464505513509, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.60it/s]
100%|██████████| 1/1 [00:00<00:00, 164.82it/s]
100%|██████████| 1/1 [00:00<00:00, 164.48it/s]
Casting the dataset: 100%|██████████| 77/77 [00:00<00:00, 19053.77 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 77/77 [00:01<00:00, 68.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 313.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5888705849647522, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0848, 'eval_samples_per_second': 11.795, 'eval_steps_per_second': 11.795, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:17,  1.73s/it]

{'eval_loss': 0.4316439926624298, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.213, 'eval_steps_per_second': 12.213, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:57<00:00,  1.73s/it]

{'eval_loss': 0.39554983377456665, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.057, 'eval_steps_per_second': 12.057, 'epoch': 3.0}


100%|██████████| 30/30 [00:59<00:00,  1.97s/it]


{'train_runtime': 59.1025, 'train_samples_per_second': 3.908, 'train_steps_per_second': 0.508, 'train_loss': 0.4464464505513509, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 123.99it/s]
100%|██████████| 1/1 [00:00<00:00, 164.81it/s]
100%|██████████| 1/1 [00:00<00:00, 141.21it/s]
Casting the dataset: 100%|██████████| 77/77 [00:00<00:00, 19247.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.51 examples/s]
Map: 100%|██████████| 77/77 [00:01<00:00, 69.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 199.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.13 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5888705849647522, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.931, 'eval_steps_per_second': 11.931, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:17,  1.73s/it]

{'eval_loss': 0.4316439926624298, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.063, 'eval_steps_per_second': 12.063, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:57<00:00,  1.73s/it]

{'eval_loss': 0.39554983377456665, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.365, 'eval_steps_per_second': 12.365, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.96s/it]


{'train_runtime': 58.6722, 'train_samples_per_second': 3.937, 'train_steps_per_second': 0.511, 'train_loss': 0.4464464505513509, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.49it/s]
100%|██████████| 1/1 [00:00<00:00, 141.42it/s]
100%|██████████| 1/1 [00:00<00:00, 141.10it/s]
Casting the dataset: 100%|██████████| 78/78 [00:00<00:00, 19265.99 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 860.72 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Map: 100%|██████████| 78/78 [00:01<00:00, 67.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.53 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.43 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.5884500741958618, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.018, 'eval_steps_per_second': 13.018, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:37<00:17,  1.78s/it]

{'eval_loss': 0.4315546154975891, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0773, 'eval_samples_per_second': 12.938, 'eval_steps_per_second': 12.938, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:57<00:00,  1.78s/it]

{'eval_loss': 0.3955293595790863, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.784, 'eval_steps_per_second': 11.784, 'epoch': 3.0}


100%|██████████| 30/30 [00:58<00:00,  1.97s/it]


{'train_runtime': 58.9826, 'train_samples_per_second': 3.967, 'train_steps_per_second': 0.509, 'train_loss': 0.4463192939758301, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.41it/s]
100%|██████████| 1/1 [00:00<00:00, 165.15it/s]
100%|██████████| 1/1 [00:00<00:00, 142.82it/s]
Casting the dataset: 100%|██████████| 78/78 [00:00<00:00, 19277.34 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 487.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 78/78 [00:01<00:00, 66.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.94 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 196.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5884500741958618, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0899, 'eval_samples_per_second': 11.119, 'eval_steps_per_second': 11.119, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:17,  1.78s/it]

{'eval_loss': 0.4315546154975891, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.02, 'eval_steps_per_second': 13.02, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:57<00:00,  1.77s/it]

{'eval_loss': 0.3955293595790863, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.021, 'eval_steps_per_second': 13.021, 'epoch': 3.0}


100%|██████████| 30/30 [00:59<00:00,  1.98s/it]


{'train_runtime': 59.3311, 'train_samples_per_second': 3.944, 'train_steps_per_second': 0.506, 'train_loss': 0.4463192939758301, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.36it/s]
100%|██████████| 1/1 [00:00<00:00, 164.95it/s]
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
Casting the dataset: 100%|██████████| 78/78 [00:00<00:00, 15489.59 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 78/78 [00:01<00:00, 68.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5884500741958618, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.711, 'eval_steps_per_second': 11.711, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:17,  1.77s/it]

{'eval_loss': 0.4315546154975891, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.224, 'eval_steps_per_second': 12.224, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:58<00:00,  1.82s/it]

{'eval_loss': 0.3955293595790863, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.017, 'eval_steps_per_second': 13.017, 'epoch': 3.0}


100%|██████████| 30/30 [00:59<00:00,  1.98s/it]


{'train_runtime': 59.4614, 'train_samples_per_second': 3.935, 'train_steps_per_second': 0.505, 'train_loss': 0.4463192939758301, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
100%|██████████| 1/1 [00:00<00:00, 141.74it/s]
100%|██████████| 1/1 [00:00<00:00, 141.29it/s]
Casting the dataset: 100%|██████████| 78/78 [00:00<00:00, 19500.25 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 78/78 [00:01<00:00, 67.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.45 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.5884500741958618, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.833, 'eval_steps_per_second': 12.833, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:17,  1.78s/it]

{'eval_loss': 0.4315546154975891, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.219, 'eval_steps_per_second': 12.219, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:58<00:00,  1.77s/it]

{'eval_loss': 0.3955293595790863, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.024, 'eval_steps_per_second': 13.024, 'epoch': 3.0}


100%|██████████| 30/30 [00:59<00:00,  1.98s/it]


{'train_runtime': 59.3487, 'train_samples_per_second': 3.943, 'train_steps_per_second': 0.505, 'train_loss': 0.4463192939758301, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.49it/s]
100%|██████████| 1/1 [00:00<00:00, 148.18it/s]
100%|██████████| 1/1 [00:00<00:00, 141.41it/s]
Casting the dataset: 100%|██████████| 78/78 [00:00<00:00, 15601.87 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 78/78 [00:01<00:00, 68.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.5884500741958618, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0763, 'eval_samples_per_second': 13.108, 'eval_steps_per_second': 13.108, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:18,  1.82s/it]

{'eval_loss': 0.4315546154975891, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.363, 'eval_steps_per_second': 12.363, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:58<00:00,  1.80s/it]

{'eval_loss': 0.3955293595790863, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.028, 'eval_steps_per_second': 12.028, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.01s/it]


{'train_runtime': 60.2676, 'train_samples_per_second': 3.883, 'train_steps_per_second': 0.498, 'train_loss': 0.4463192939758301, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.56it/s]
100%|██████████| 1/1 [00:00<00:00, 164.89it/s]
100%|██████████| 1/1 [00:00<00:00, 141.06it/s]
Casting the dataset: 100%|██████████| 79/79 [00:00<00:00, 15794.37 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 79/79 [00:01<00:00, 67.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.5883163809776306, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.302, 'eval_steps_per_second': 12.302, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:18,  1.82s/it]

{'eval_loss': 0.4315393567085266, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.024, 'eval_steps_per_second': 13.024, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:58<00:00,  1.82s/it]

{'eval_loss': 0.3954751193523407, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0748, 'eval_samples_per_second': 13.369, 'eval_steps_per_second': 13.369, 'epoch': 3.0}


100%|██████████| 30/30 [00:59<00:00,  1.99s/it]


{'train_runtime': 59.6118, 'train_samples_per_second': 3.976, 'train_steps_per_second': 0.503, 'train_loss': 0.4462815602620443, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.85it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 141.44it/s]
Casting the dataset: 100%|██████████| 79/79 [00:00<00:00, 19498.06 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.70 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 79/79 [00:01<00:00, 66.56 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.26 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.5883163809776306, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.292, 'eval_steps_per_second': 12.292, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:18,  1.82s/it]

{'eval_loss': 0.4315393567085266, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0769, 'eval_samples_per_second': 13.004, 'eval_steps_per_second': 13.004, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:58<00:00,  1.86s/it]

{'eval_loss': 0.3954751193523407, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.772, 'eval_steps_per_second': 11.772, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.00s/it]


{'train_runtime': 60.0959, 'train_samples_per_second': 3.944, 'train_steps_per_second': 0.499, 'train_loss': 0.4462815602620443, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.54it/s]
100%|██████████| 1/1 [00:00<00:00, 164.92it/s]
100%|██████████| 1/1 [00:00<00:00, 141.76it/s]
Casting the dataset: 100%|██████████| 79/79 [00:00<00:00, 19754.96 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 487.77 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 79/79 [00:01<00:00, 67.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 197.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 218.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5883163809776306, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.211, 'eval_steps_per_second': 12.211, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:39<00:18,  1.84s/it]

{'eval_loss': 0.4315393567085266, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.265, 'eval_steps_per_second': 12.265, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:58<00:00,  1.83s/it]

{'eval_loss': 0.3954751193523407, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.186, 'eval_steps_per_second': 13.186, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.01s/it]


{'train_runtime': 60.2052, 'train_samples_per_second': 3.937, 'train_steps_per_second': 0.498, 'train_loss': 0.4462815602620443, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
100%|██████████| 1/1 [00:00<00:00, 166.73it/s]
100%|██████████| 1/1 [00:00<00:00, 141.30it/s]
Casting the dataset: 100%|██████████| 79/79 [00:00<00:00, 15682.24 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 940.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 950.44 examples/s]
Map: 100%|██████████| 79/79 [00:01<00:00, 66.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.29 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.5883163809776306, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.848, 'eval_steps_per_second': 11.848, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:39<00:18,  1.87s/it]

{'eval_loss': 0.4315393567085266, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.214, 'eval_steps_per_second': 12.214, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:59<00:00,  1.84s/it]

{'eval_loss': 0.3954751193523407, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0865, 'eval_samples_per_second': 11.554, 'eval_steps_per_second': 11.554, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.02s/it]


{'train_runtime': 60.5146, 'train_samples_per_second': 3.916, 'train_steps_per_second': 0.496, 'train_loss': 0.4462815602620443, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
100%|██████████| 1/1 [00:00<00:00, 141.59it/s]
100%|██████████| 1/1 [00:00<00:00, 141.43it/s]
Casting the dataset: 100%|██████████| 79/79 [00:00<00:00, 15809.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.70 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 953.25 examples/s]
Map: 100%|██████████| 79/79 [00:01<00:00, 66.24 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 244.67 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.5883163809776306, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.282, 'eval_steps_per_second': 12.282, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:39<00:18,  1.88s/it]

{'eval_loss': 0.4315393567085266, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.214, 'eval_steps_per_second': 12.214, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:59<00:00,  1.87s/it]

{'eval_loss': 0.3954751193523407, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.363, 'eval_steps_per_second': 12.363, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.03s/it]


{'train_runtime': 60.9671, 'train_samples_per_second': 3.887, 'train_steps_per_second': 0.492, 'train_loss': 0.4462815602620443, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
100%|██████████| 1/1 [00:00<00:00, 141.84it/s]
100%|██████████| 1/1 [00:00<00:00, 142.82it/s]
Casting the dataset: 100%|██████████| 80/80 [00:00<00:00, 19762.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 80/80 [00:01<00:00, 64.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be abl

{'eval_loss': 0.5881294012069702, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0895, 'eval_samples_per_second': 11.179, 'eval_steps_per_second': 11.179, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:39<00:19,  1.90s/it]

{'eval_loss': 0.4314801096916199, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.713, 'eval_steps_per_second': 11.713, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:59<00:00,  1.85s/it]

{'eval_loss': 0.3953205347061157, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.151, 'eval_steps_per_second': 12.151, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.01s/it]


{'train_runtime': 60.3918, 'train_samples_per_second': 3.974, 'train_steps_per_second': 0.497, 'train_loss': 0.44629138310750327, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
100%|██████████| 1/1 [00:00<00:00, 164.69it/s]
100%|██████████| 1/1 [00:00<00:00, 141.15it/s]
Casting the dataset: 100%|██████████| 80/80 [00:00<00:00, 20001.45 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 80/80 [00:01<00:00, 67.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5881294012069702, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0859, 'eval_samples_per_second': 11.639, 'eval_steps_per_second': 11.639, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:39<00:18,  1.89s/it]

{'eval_loss': 0.4314801096916199, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.78, 'eval_steps_per_second': 11.78, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:59<00:00,  1.87s/it]

{'eval_loss': 0.3953205347061157, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0758, 'eval_samples_per_second': 13.186, 'eval_steps_per_second': 13.186, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.02s/it]


{'train_runtime': 60.6162, 'train_samples_per_second': 3.959, 'train_steps_per_second': 0.495, 'train_loss': 0.44629138310750327, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.67it/s]
100%|██████████| 1/1 [00:00<00:00, 164.78it/s]
100%|██████████| 1/1 [00:00<00:00, 141.45it/s]
Casting the dataset: 100%|██████████| 80/80 [00:00<00:00, 15879.24 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.46 examples/s]
Map: 100%|██████████| 80/80 [00:01<00:00, 68.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.42 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.43 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5881294012069702, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.361, 'eval_steps_per_second': 12.361, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:39<00:19,  1.92s/it]

{'eval_loss': 0.4314801096916199, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.708, 'eval_steps_per_second': 11.708, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:59<00:00,  1.88s/it]

{'eval_loss': 0.3953205347061157, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.224, 'eval_steps_per_second': 12.224, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.02s/it]


{'train_runtime': 60.6457, 'train_samples_per_second': 3.957, 'train_steps_per_second': 0.495, 'train_loss': 0.44629138310750327, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.39it/s]
100%|██████████| 1/1 [00:00<00:00, 165.22it/s]
100%|██████████| 1/1 [00:00<00:00, 141.39it/s]
Casting the dataset: 100%|██████████| 80/80 [00:00<00:00, 15827.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 950.87 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.75 examples/s]
Map: 100%|██████████| 80/80 [00:01<00:00, 67.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5881294012069702, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0838, 'eval_samples_per_second': 11.931, 'eval_steps_per_second': 11.931, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:18,  1.89s/it]

{'eval_loss': 0.4314801096916199, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.907, 'eval_steps_per_second': 11.907, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:58<00:00,  1.89s/it]

{'eval_loss': 0.3953205347061157, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.51, 'eval_steps_per_second': 11.51, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.00s/it]


{'train_runtime': 60.0337, 'train_samples_per_second': 3.998, 'train_steps_per_second': 0.5, 'train_loss': 0.44629138310750327, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 165.05it/s]
100%|██████████| 1/1 [00:00<00:00, 141.62it/s]
100%|██████████| 1/1 [00:00<00:00, 141.04it/s]
Casting the dataset: 100%|██████████| 80/80 [00:00<00:00, 15860.48 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.94 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.20 examples/s]
Map: 100%|██████████| 80/80 [00:01<00:00, 65.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 200.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.79 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.56 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.5881294012069702, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.69, 'eval_steps_per_second': 12.69, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 20/30 [00:38<00:18,  1.87s/it]

{'eval_loss': 0.4314801096916199, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.506, 'eval_steps_per_second': 11.506, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 30/30 [00:59<00:00,  1.90s/it]

{'eval_loss': 0.3953205347061157, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.154, 'eval_steps_per_second': 12.154, 'epoch': 3.0}


100%|██████████| 30/30 [01:00<00:00,  2.02s/it]


{'train_runtime': 60.5592, 'train_samples_per_second': 3.963, 'train_steps_per_second': 0.495, 'train_loss': 0.44629138310750327, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 141.67it/s]
100%|██████████| 1/1 [00:00<00:00, 141.29it/s]
Casting the dataset: 100%|██████████| 81/81 [00:00<00:00, 16062.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 81/81 [00:01<00:00, 63.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.5611379742622375, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.21, 'eval_steps_per_second': 12.21, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:40<00:16,  1.53s/it]

{'eval_loss': 0.40498480200767517, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0768, 'eval_samples_per_second': 13.015, 'eval_steps_per_second': 13.015, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:01<00:00,  1.54s/it]

{'eval_loss': 0.37855589389801025, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.078, 'eval_samples_per_second': 12.819, 'eval_steps_per_second': 12.819, 'epoch': 3.0}


100%|██████████| 33/33 [01:02<00:00,  1.89s/it]


{'train_runtime': 62.4422, 'train_samples_per_second': 3.892, 'train_steps_per_second': 0.528, 'train_loss': 0.40465944463556464, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.44it/s]
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
100%|██████████| 1/1 [00:00<00:00, 141.60it/s]
Casting the dataset: 100%|██████████| 81/81 [00:00<00:00, 19959.97 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Map: 100%|██████████| 81/81 [00:01<00:00, 65.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 197.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.4812747836112976, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0849, 'eval_samples_per_second': 11.782, 'eval_steps_per_second': 11.782, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:40<00:16,  1.52s/it]

{'eval_loss': 0.3502310812473297, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0909, 'eval_samples_per_second': 11.003, 'eval_steps_per_second': 11.003, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:01<00:00,  1.55s/it]

{'eval_loss': 0.3308861255645752, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0864, 'eval_samples_per_second': 11.572, 'eval_steps_per_second': 11.572, 'epoch': 3.0}


100%|██████████| 33/33 [01:02<00:00,  1.91s/it]


{'train_runtime': 62.9862, 'train_samples_per_second': 3.858, 'train_steps_per_second': 0.524, 'train_loss': 0.40288968519731, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.34it/s]
100%|██████████| 1/1 [00:00<00:00, 82.46it/s]
100%|██████████| 1/1 [00:00<00:00, 76.55it/s]
Casting the dataset: 100%|██████████| 81/81 [00:00<00:00, 20249.05 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 81/81 [00:01<00:00, 65.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 313.69 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 239.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.4812747836112976, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.694, 'eval_steps_per_second': 12.694, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:40<00:16,  1.52s/it]

{'eval_loss': 0.3502310812473297, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0779, 'eval_samples_per_second': 12.843, 'eval_steps_per_second': 12.843, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:00<00:00,  1.51s/it]

{'eval_loss': 0.3308861255645752, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.505, 'eval_steps_per_second': 11.505, 'epoch': 3.0}


100%|██████████| 33/33 [01:02<00:00,  1.89s/it]


{'train_runtime': 62.2995, 'train_samples_per_second': 3.901, 'train_steps_per_second': 0.53, 'train_loss': 0.40288968519731, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.42it/s]
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
100%|██████████| 1/1 [00:00<00:00, 141.57it/s]
Casting the dataset: 100%|██████████| 81/81 [00:00<00:00, 16200.40 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 81/81 [00:01<00:00, 63.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 487.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.4812747836112976, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.76, 'eval_steps_per_second': 11.76, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:40<00:16,  1.52s/it]

{'eval_loss': 0.3502310812473297, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.368, 'eval_steps_per_second': 12.368, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:00<00:00,  1.53s/it]

{'eval_loss': 0.3308861255645752, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0818, 'eval_samples_per_second': 12.22, 'eval_steps_per_second': 12.22, 'epoch': 3.0}


100%|██████████| 33/33 [01:02<00:00,  1.89s/it]


{'train_runtime': 62.3451, 'train_samples_per_second': 3.898, 'train_steps_per_second': 0.529, 'train_loss': 0.40288968519731, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.51it/s]
100%|██████████| 1/1 [00:00<00:00, 165.07it/s]
100%|██████████| 1/1 [00:00<00:00, 164.25it/s]
Casting the dataset: 100%|██████████| 81/81 [00:00<00:00, 16093.73 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 951.74 examples/s]
Map: 100%|██████████| 81/81 [00:01<00:00, 66.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 216.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 199.92 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.4812747836112976, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.058, 'eval_steps_per_second': 12.058, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:40<00:16,  1.53s/it]

{'eval_loss': 0.3502310812473297, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0869, 'eval_samples_per_second': 11.511, 'eval_steps_per_second': 11.511, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:01<00:00,  1.53s/it]

{'eval_loss': 0.3308861255645752, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.068, 'eval_steps_per_second': 12.068, 'epoch': 3.0}


100%|██████████| 33/33 [01:02<00:00,  1.91s/it]


{'train_runtime': 62.9537, 'train_samples_per_second': 3.86, 'train_steps_per_second': 0.524, 'train_loss': 0.40288968519731, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 141.79it/s]
100%|██████████| 1/1 [00:00<00:00, 162.61it/s]
100%|██████████| 1/1 [00:00<00:00, 141.13it/s]
Casting the dataset: 100%|██████████| 82/82 [00:00<00:00, 20248.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 82/82 [00:01<00:00, 64.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.4802352786064148, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0819, 'eval_samples_per_second': 12.212, 'eval_steps_per_second': 12.212, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:40<00:17,  1.59s/it]

{'eval_loss': 0.3504232168197632, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.084, 'eval_samples_per_second': 11.907, 'eval_steps_per_second': 11.907, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:01<00:00,  1.55s/it]

{'eval_loss': 0.33077070116996765, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0829, 'eval_samples_per_second': 12.061, 'eval_steps_per_second': 12.061, 'epoch': 3.0}


100%|██████████| 33/33 [01:02<00:00,  1.91s/it]


{'train_runtime': 62.9264, 'train_samples_per_second': 3.909, 'train_steps_per_second': 0.524, 'train_loss': 0.4023123654452237, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 164.88it/s]
100%|██████████| 1/1 [00:00<00:00, 164.57it/s]
100%|██████████| 1/1 [00:00<00:00, 141.63it/s]
Casting the dataset: 100%|██████████| 82/82 [00:00<00:00, 16287.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.70 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.33 examples/s]
Map: 100%|██████████| 82/82 [00:01<00:00, 63.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 326.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 327.37 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.4802352786064148, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0788, 'eval_samples_per_second': 12.692, 'eval_steps_per_second': 12.692, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:40<00:17,  1.57s/it]

{'eval_loss': 0.3504232168197632, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.357, 'eval_steps_per_second': 12.357, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:01<00:00,  1.61s/it]

{'eval_loss': 0.33077070116996765, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0863, 'eval_samples_per_second': 11.592, 'eval_steps_per_second': 11.592, 'epoch': 3.0}


100%|██████████| 33/33 [01:03<00:00,  1.91s/it]


{'train_runtime': 63.1869, 'train_samples_per_second': 3.893, 'train_steps_per_second': 0.522, 'train_loss': 0.4023123654452237, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 144.57it/s]
100%|██████████| 1/1 [00:00<00:00, 143.54it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
Casting the dataset: 100%|██████████| 82/82 [00:00<00:00, 5247.84 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 82/82 [00:01<00:00, 60.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 72.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  _war

{'eval_loss': 0.4802352786064148, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1088, 'eval_samples_per_second': 9.194, 'eval_steps_per_second': 9.194, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:44<00:18,  1.70s/it]

{'eval_loss': 0.3504232168197632, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0808, 'eval_samples_per_second': 12.372, 'eval_steps_per_second': 12.372, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:06<00:00,  1.65s/it]

{'eval_loss': 0.33077070116996765, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.99, 'eval_steps_per_second': 11.99, 'epoch': 3.0}


100%|██████████| 33/33 [01:08<00:00,  2.07s/it]


{'train_runtime': 68.2835, 'train_samples_per_second': 3.603, 'train_steps_per_second': 0.483, 'train_loss': 0.4023123654452237, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 117.41it/s]
100%|██████████| 1/1 [00:00<00:00, 153.55it/s]
100%|██████████| 1/1 [00:00<00:00, 140.22it/s]
Casting the dataset: 100%|██████████| 82/82 [00:00<00:00, 16404.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Map: 100%|██████████| 82/82 [00:01<00:00, 64.59 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.78 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 241.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.4802352786064148, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1251, 'eval_samples_per_second': 7.996, 'eval_steps_per_second': 7.996, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:41<00:17,  1.60s/it]

{'eval_loss': 0.3504232168197632, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.305, 'eval_steps_per_second': 12.305, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.66s/it]

{'eval_loss': 0.33077070116996765, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0835, 'eval_samples_per_second': 11.976, 'eval_steps_per_second': 11.976, 'epoch': 3.0}


100%|██████████| 33/33 [01:04<00:00,  1.96s/it]


{'train_runtime': 64.827, 'train_samples_per_second': 3.795, 'train_steps_per_second': 0.509, 'train_loss': 0.4023123654452237, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
100%|██████████| 1/1 [00:00<00:00, 141.14it/s]
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
Casting the dataset: 100%|██████████| 82/82 [00:00<00:00, 20502.71 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 82/82 [00:01<00:00, 64.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.91 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.4802352786064148, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0996, 'eval_samples_per_second': 10.043, 'eval_steps_per_second': 10.043, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:18,  1.64s/it]

{'eval_loss': 0.3504232168197632, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0791, 'eval_samples_per_second': 12.643, 'eval_steps_per_second': 12.643, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.62s/it]

{'eval_loss': 0.33077070116996765, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.254, 'eval_steps_per_second': 12.254, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.98s/it]


{'train_runtime': 65.2531, 'train_samples_per_second': 3.77, 'train_steps_per_second': 0.506, 'train_loss': 0.4023123654452237, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.13it/s]
100%|██████████| 1/1 [00:00<00:00, 153.77it/s]
100%|██████████| 1/1 [00:00<00:00, 153.81it/s]
Casting the dataset: 100%|██████████| 83/83 [00:00<00:00, 18401.90 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.13 examples/s]
Map: 100%|██████████| 83/83 [00:01<00:00, 64.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 400.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.11 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.4798680543899536, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.252, 'eval_steps_per_second': 12.252, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:18,  1.69s/it]

{'eval_loss': 0.34996554255485535, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.1377, 'eval_samples_per_second': 7.262, 'eval_steps_per_second': 7.262, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.68s/it]

{'eval_loss': 0.33035042881965637, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.033, 'eval_steps_per_second': 12.033, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.99s/it]


{'train_runtime': 65.5221, 'train_samples_per_second': 3.8, 'train_steps_per_second': 0.504, 'train_loss': 0.40209013043027936, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.13it/s]
100%|██████████| 1/1 [00:00<00:00, 133.11it/s]
100%|██████████| 1/1 [00:00<00:00, 133.11it/s]
Casting the dataset: 100%|██████████| 83/83 [00:00<00:00, 20749.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 83/83 [00:01<00:00, 64.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 398.24 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.4798680543899536, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0842, 'eval_samples_per_second': 11.874, 'eval_steps_per_second': 11.874, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:18,  1.68s/it]

{'eval_loss': 0.34996554255485535, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.1236, 'eval_samples_per_second': 8.088, 'eval_steps_per_second': 8.088, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.67s/it]

{'eval_loss': 0.33035042881965637, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.347, 'eval_steps_per_second': 11.347, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.97s/it]


{'train_runtime': 65.0223, 'train_samples_per_second': 3.829, 'train_steps_per_second': 0.508, 'train_loss': 0.40209013043027936, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.04it/s]
100%|██████████| 1/1 [00:00<00:00, 153.55it/s]
100%|██████████| 1/1 [00:00<00:00, 142.67it/s]
Casting the dataset: 100%|██████████| 83/83 [00:00<00:00, 20762.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 994.38 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Map: 100%|██████████| 83/83 [00:01<00:00, 63.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.4798680543899536, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0896, 'eval_samples_per_second': 11.161, 'eval_steps_per_second': 11.161, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:41<00:18,  1.66s/it]

{'eval_loss': 0.34996554255485535, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.107, 'eval_steps_per_second': 12.107, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.72s/it]

{'eval_loss': 0.33035042881965637, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.962, 'eval_steps_per_second': 11.962, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.98s/it]


{'train_runtime': 65.2436, 'train_samples_per_second': 3.816, 'train_steps_per_second': 0.506, 'train_loss': 0.40209013043027936, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.08it/s]
100%|██████████| 1/1 [00:00<00:00, 142.62it/s]
100%|██████████| 1/1 [00:00<00:00, 133.01it/s]
Casting the dataset: 100%|██████████| 83/83 [00:00<00:00, 16612.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 83/83 [00:01<00:00, 64.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.4798680543899536, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0891, 'eval_samples_per_second': 11.228, 'eval_steps_per_second': 11.228, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:41<00:18,  1.66s/it]

{'eval_loss': 0.34996554255485535, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0862, 'eval_samples_per_second': 11.6, 'eval_steps_per_second': 11.6, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.66s/it]

{'eval_loss': 0.33035042881965637, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.546, 'eval_steps_per_second': 11.546, 'epoch': 3.0}


100%|██████████| 33/33 [01:04<00:00,  1.96s/it]


{'train_runtime': 64.8386, 'train_samples_per_second': 3.84, 'train_steps_per_second': 0.509, 'train_loss': 0.40209013043027936, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 124.92it/s]
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
100%|██████████| 1/1 [00:00<00:00, 153.55it/s]
Casting the dataset: 100%|██████████| 83/83 [00:00<00:00, 20757.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 83/83 [00:01<00:00, 65.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 399.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task

{'eval_loss': 0.4798680543899536, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0941, 'eval_samples_per_second': 10.622, 'eval_steps_per_second': 10.622, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:41<00:18,  1.67s/it]

{'eval_loss': 0.34996554255485535, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.031, 'eval_steps_per_second': 12.031, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.70s/it]

{'eval_loss': 0.33035042881965637, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.1332, 'eval_samples_per_second': 7.509, 'eval_steps_per_second': 7.509, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.97s/it]


{'train_runtime': 65.1048, 'train_samples_per_second': 3.825, 'train_steps_per_second': 0.507, 'train_loss': 0.40209013043027936, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.78it/s]
100%|██████████| 1/1 [00:00<00:00, 133.01it/s]
100%|██████████| 1/1 [00:00<00:00, 142.43it/s]
Casting the dataset: 100%|██████████| 84/84 [00:00<00:00, 20992.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.02 examples/s]
Map: 100%|██████████| 84/84 [00:01<00:00, 62.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.47999054193496704, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1147, 'eval_samples_per_second': 8.72, 'eval_steps_per_second': 8.72, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:43<00:19,  1.75s/it]

{'eval_loss': 0.34993427991867065, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.1226, 'eval_samples_per_second': 8.158, 'eval_steps_per_second': 8.158, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:05<00:00,  1.73s/it]

{'eval_loss': 0.3302500545978546, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0976, 'eval_samples_per_second': 10.242, 'eval_steps_per_second': 10.242, 'epoch': 3.0}


100%|██████████| 33/33 [01:06<00:00,  2.01s/it]


{'train_runtime': 66.2967, 'train_samples_per_second': 3.801, 'train_steps_per_second': 0.498, 'train_loss': 0.40188838496352686, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.68it/s]
100%|██████████| 1/1 [00:00<00:00, 166.54it/s]
100%|██████████| 1/1 [00:00<00:00, 133.07it/s]
Casting the dataset: 100%|██████████| 84/84 [00:00<00:00, 16802.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 84/84 [00:01<00:00, 64.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 398.47 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.47999054193496704, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1302, 'eval_samples_per_second': 7.683, 'eval_steps_per_second': 7.683, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:43<00:18,  1.72s/it]

{'eval_loss': 0.34993427991867065, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0812, 'eval_samples_per_second': 12.318, 'eval_steps_per_second': 12.318, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.71s/it]

{'eval_loss': 0.3302500545978546, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0997, 'eval_samples_per_second': 10.032, 'eval_steps_per_second': 10.032, 'epoch': 3.0}


100%|██████████| 33/33 [01:06<00:00,  2.00s/it]


{'train_runtime': 66.1236, 'train_samples_per_second': 3.811, 'train_steps_per_second': 0.499, 'train_loss': 0.40188838496352686, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.59it/s]
100%|██████████| 1/1 [00:00<00:00, 153.59it/s]
100%|██████████| 1/1 [00:00<00:00, 133.19it/s]
Casting the dataset: 100%|██████████| 84/84 [00:00<00:00, 20895.65 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.50 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Map: 100%|██████████| 84/84 [00:01<00:00, 64.71 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 439.01 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.47999054193496704, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1126, 'eval_samples_per_second': 8.884, 'eval_steps_per_second': 8.884, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:19,  1.76s/it]

{'eval_loss': 0.34993427991867065, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0825, 'eval_samples_per_second': 12.119, 'eval_steps_per_second': 12.119, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.70s/it]

{'eval_loss': 0.3302500545978546, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0875, 'eval_samples_per_second': 11.427, 'eval_steps_per_second': 11.427, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.99s/it]


{'train_runtime': 65.7356, 'train_samples_per_second': 3.834, 'train_steps_per_second': 0.502, 'train_loss': 0.40188838496352686, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.88it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
Casting the dataset: 100%|██████████| 84/84 [00:00<00:00, 20961.54 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.27 examples/s]
Map: 100%|██████████| 84/84 [00:01<00:00, 63.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able

{'eval_loss': 0.47999054193496704, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1355, 'eval_samples_per_second': 7.379, 'eval_steps_per_second': 7.379, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:19,  1.74s/it]

{'eval_loss': 0.34993427991867065, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0955, 'eval_samples_per_second': 10.47, 'eval_steps_per_second': 10.47, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.73s/it]

{'eval_loss': 0.3302500545978546, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.1365, 'eval_samples_per_second': 7.325, 'eval_steps_per_second': 7.325, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  2.00s/it]


{'train_runtime': 65.9314, 'train_samples_per_second': 3.822, 'train_steps_per_second': 0.501, 'train_loss': 0.40188838496352686, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 142.80it/s]
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
Casting the dataset: 100%|██████████| 84/84 [00:00<00:00, 16799.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 84/84 [00:01<00:00, 65.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be abl

{'eval_loss': 0.47999054193496704, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1095, 'eval_samples_per_second': 9.131, 'eval_steps_per_second': 9.131, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:18,  1.71s/it]

{'eval_loss': 0.34993427991867065, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0855, 'eval_samples_per_second': 11.694, 'eval_steps_per_second': 11.694, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:03<00:00,  1.72s/it]

{'eval_loss': 0.3302500545978546, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.1085, 'eval_samples_per_second': 9.215, 'eval_steps_per_second': 9.215, 'epoch': 3.0}


100%|██████████| 33/33 [01:05<00:00,  1.98s/it]


{'train_runtime': 65.2377, 'train_samples_per_second': 3.863, 'train_steps_per_second': 0.506, 'train_loss': 0.40188838496352686, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 124.99it/s]
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
Casting the dataset: 100%|██████████| 85/85 [00:00<00:00, 21238.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 85/85 [00:01<00:00, 62.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.4797893762588501, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0857, 'eval_samples_per_second': 11.669, 'eval_steps_per_second': 11.669, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:42<00:19,  1.78s/it]

{'eval_loss': 0.3498690128326416, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.176, 'eval_steps_per_second': 12.176, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:04<00:00,  1.81s/it]

{'eval_loss': 0.33022621273994446, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0791, 'eval_samples_per_second': 12.639, 'eval_steps_per_second': 12.639, 'epoch': 3.0}


100%|██████████| 33/33 [01:06<00:00,  2.00s/it]


{'train_runtime': 66.1172, 'train_samples_per_second': 3.857, 'train_steps_per_second': 0.499, 'train_loss': 0.4019193071307558, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.12it/s]
100%|██████████| 1/1 [00:00<00:00, 124.83it/s]
100%|██████████| 1/1 [00:00<00:00, 142.66it/s]
Casting the dataset: 100%|██████████| 85/85 [00:00<00:00, 17002.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.39 examples/s]
Map: 100%|██████████| 85/85 [00:01<00:00, 58.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 308.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.4797893762588501, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.18, 'eval_steps_per_second': 12.18, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:43<00:20,  1.86s/it]

{'eval_loss': 0.3498690128326416, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.679, 'eval_steps_per_second': 11.679, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:06<00:00,  1.87s/it]

{'eval_loss': 0.33022621273994446, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0903, 'eval_samples_per_second': 11.071, 'eval_steps_per_second': 11.071, 'epoch': 3.0}


100%|██████████| 33/33 [01:08<00:00,  2.06s/it]


{'train_runtime': 68.0562, 'train_samples_per_second': 3.747, 'train_steps_per_second': 0.485, 'train_loss': 0.4019193071307558, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 117.42it/s]
100%|██████████| 1/1 [00:00<00:00, 133.20it/s]
100%|██████████| 1/1 [00:00<00:00, 163.97it/s]
Casting the dataset: 100%|██████████| 85/85 [00:00<00:00, 24234.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 994.38 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Map: 100%|██████████| 85/85 [00:01<00:00, 58.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 308.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.4797893762588501, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.818, 'eval_steps_per_second': 11.818, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:44<00:20,  1.84s/it]

{'eval_loss': 0.3498690128326416, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.677, 'eval_steps_per_second': 11.677, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:06<00:00,  1.83s/it]

{'eval_loss': 0.33022621273994446, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.547, 'eval_steps_per_second': 11.547, 'epoch': 3.0}


100%|██████████| 33/33 [01:08<00:00,  2.06s/it]


{'train_runtime': 68.1143, 'train_samples_per_second': 3.744, 'train_steps_per_second': 0.484, 'train_loss': 0.4019193071307558, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.05it/s]
100%|██████████| 1/1 [00:00<00:00, 153.50it/s]
100%|██████████| 1/1 [00:00<00:00, 142.82it/s]
Casting the dataset: 100%|██████████| 85/85 [00:00<00:00, 16985.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 85/85 [00:01<00:00, 59.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.4797893762588501, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.033, 'eval_steps_per_second': 12.033, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:44<00:20,  1.86s/it]

{'eval_loss': 0.3498690128326416, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.181, 'eval_steps_per_second': 12.181, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:06<00:00,  1.82s/it]

{'eval_loss': 0.33022621273994446, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0786, 'eval_samples_per_second': 12.72, 'eval_steps_per_second': 12.72, 'epoch': 3.0}


100%|██████████| 33/33 [01:07<00:00,  2.06s/it]


{'train_runtime': 67.9809, 'train_samples_per_second': 3.751, 'train_steps_per_second': 0.485, 'train_loss': 0.4019193071307558, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.59it/s]
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
100%|██████████| 1/1 [00:00<00:00, 153.48it/s]
Casting the dataset: 100%|██████████| 85/85 [00:00<00:00, 16997.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.81 examples/s]
Map: 100%|██████████| 85/85 [00:01<00:00, 61.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 330.47 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.98 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.4797893762588501, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0891, 'eval_samples_per_second': 11.221, 'eval_steps_per_second': 11.221, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:43<00:19,  1.80s/it]

{'eval_loss': 0.3498690128326416, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0781, 'eval_samples_per_second': 12.802, 'eval_steps_per_second': 12.802, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:05<00:00,  1.82s/it]

{'eval_loss': 0.33022621273994446, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.106, 'eval_steps_per_second': 12.106, 'epoch': 3.0}


100%|██████████| 33/33 [01:06<00:00,  2.03s/it]


{'train_runtime': 66.9842, 'train_samples_per_second': 3.807, 'train_steps_per_second': 0.493, 'train_loss': 0.4019193071307558, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.11it/s]
100%|██████████| 1/1 [00:00<00:00, 166.45it/s]
100%|██████████| 1/1 [00:00<00:00, 133.12it/s]
Casting the dataset: 100%|██████████| 86/86 [00:00<00:00, 21511.82 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 86/86 [00:01<00:00, 61.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 398.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.4798485338687897, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.68, 'eval_steps_per_second': 11.68, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:43<00:20,  1.82s/it]

{'eval_loss': 0.34988123178482056, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.819, 'eval_steps_per_second': 11.819, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:05<00:00,  1.85s/it]

{'eval_loss': 0.33024507761001587, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0776, 'eval_samples_per_second': 12.888, 'eval_steps_per_second': 12.888, 'epoch': 3.0}


100%|██████████| 33/33 [01:07<00:00,  2.03s/it]


{'train_runtime': 67.0623, 'train_samples_per_second': 3.847, 'train_steps_per_second': 0.492, 'train_loss': 0.4018297484426787, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.07it/s]
100%|██████████| 1/1 [00:00<00:00, 153.72it/s]
100%|██████████| 1/1 [00:00<00:00, 133.09it/s]
Casting the dataset: 100%|██████████| 86/86 [00:00<00:00, 17158.70 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 86/86 [00:01<00:00, 58.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.87 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.69 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.4798485338687897, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.055, 'eval_steps_per_second': 12.055, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:43<00:20,  1.85s/it]

{'eval_loss': 0.34988123178482056, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0951, 'eval_samples_per_second': 10.513, 'eval_steps_per_second': 10.513, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:05<00:00,  1.89s/it]

{'eval_loss': 0.33024507761001587, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.102, 'eval_steps_per_second': 12.102, 'epoch': 3.0}


100%|██████████| 33/33 [01:07<00:00,  2.04s/it]


{'train_runtime': 67.4383, 'train_samples_per_second': 3.826, 'train_steps_per_second': 0.489, 'train_loss': 0.4018297484426787, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.77it/s]
100%|██████████| 1/1 [00:00<00:00, 117.46it/s]
100%|██████████| 1/1 [00:00<00:00, 166.60it/s]
Casting the dataset: 100%|██████████| 86/86 [00:00<00:00, 19082.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.57 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 720.67 examples/s]
Map: 100%|██████████| 86/86 [00:01<00:00, 58.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.4798485338687897, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.25, 'eval_steps_per_second': 12.25, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:43<00:20,  1.85s/it]

{'eval_loss': 0.34988123178482056, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0886, 'eval_samples_per_second': 11.283, 'eval_steps_per_second': 11.283, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:06<00:00,  1.89s/it]

{'eval_loss': 0.33024507761001587, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0862, 'eval_samples_per_second': 11.607, 'eval_steps_per_second': 11.607, 'epoch': 3.0}


100%|██████████| 33/33 [01:07<00:00,  2.05s/it]


{'train_runtime': 67.6668, 'train_samples_per_second': 3.813, 'train_steps_per_second': 0.488, 'train_loss': 0.4018297484426787, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.17it/s]
100%|██████████| 1/1 [00:00<00:00, 133.13it/s]
100%|██████████| 1/1 [00:00<00:00, 153.60it/s]
Casting the dataset: 100%|██████████| 86/86 [00:00<00:00, 19049.92 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 86/86 [00:01<00:00, 56.80 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able 

{'eval_loss': 0.4798485338687897, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.108, 'eval_steps_per_second': 12.108, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:43<00:20,  1.83s/it]

{'eval_loss': 0.34988123178482056, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0786, 'eval_samples_per_second': 12.72, 'eval_steps_per_second': 12.72, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:05<00:00,  1.83s/it]

{'eval_loss': 0.33024507761001587, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.105, 'eval_steps_per_second': 12.105, 'epoch': 3.0}


100%|██████████| 33/33 [01:06<00:00,  2.03s/it]


{'train_runtime': 66.8968, 'train_samples_per_second': 3.857, 'train_steps_per_second': 0.493, 'train_loss': 0.4018297484426787, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.55it/s]
100%|██████████| 1/1 [00:00<00:00, 142.78it/s]
100%|██████████| 1/1 [00:00<00:00, 153.49it/s]
Casting the dataset: 100%|██████████| 86/86 [00:00<00:00, 19201.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 994.38 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Map: 100%|██████████| 86/86 [00:01<00:00, 62.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.4798485338687897, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.889, 'eval_steps_per_second': 11.889, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:43<00:20,  1.83s/it]

{'eval_loss': 0.34988123178482056, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.962, 'eval_steps_per_second': 11.962, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:05<00:00,  1.84s/it]

{'eval_loss': 0.33024507761001587, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0787, 'eval_samples_per_second': 12.703, 'eval_steps_per_second': 12.703, 'epoch': 3.0}


100%|██████████| 33/33 [01:06<00:00,  2.03s/it]


{'train_runtime': 66.9675, 'train_samples_per_second': 3.853, 'train_steps_per_second': 0.493, 'train_loss': 0.4018297484426787, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.49it/s]
100%|██████████| 1/1 [00:00<00:00, 142.78it/s]
100%|██████████| 1/1 [00:00<00:00, 142.68it/s]
Casting the dataset: 100%|██████████| 87/87 [00:00<00:00, 21746.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 498.55 examples/s]
Map: 100%|██████████| 87/87 [00:01<00:00, 60.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.4800327718257904, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.959, 'eval_steps_per_second': 11.959, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:43<00:20,  1.90s/it]

{'eval_loss': 0.34981271624565125, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.815, 'eval_steps_per_second': 11.815, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:05<00:00,  1.89s/it]

{'eval_loss': 0.33015888929367065, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0776, 'eval_samples_per_second': 12.887, 'eval_steps_per_second': 12.887, 'epoch': 3.0}


100%|██████████| 33/33 [01:07<00:00,  2.04s/it]


{'train_runtime': 67.2705, 'train_samples_per_second': 3.88, 'train_steps_per_second': 0.491, 'train_loss': 0.4018343145197088, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.53it/s]
100%|██████████| 1/1 [00:00<00:00, 166.44it/s]
100%|██████████| 1/1 [00:00<00:00, 133.07it/s]
Casting the dataset: 100%|██████████| 87/87 [00:00<00:00, 21739.91 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 878.57 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 993.20 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.27 examples/s]
Map: 100%|██████████| 87/87 [00:01<00:00, 58.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 398.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.4800327718257904, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.036, 'eval_steps_per_second': 12.036, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:43<00:20,  1.88s/it]

{'eval_loss': 0.34981271624565125, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.095, 'eval_samples_per_second': 10.527, 'eval_steps_per_second': 10.527, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:05<00:00,  1.88s/it]

{'eval_loss': 0.33015888929367065, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0776, 'eval_samples_per_second': 12.884, 'eval_steps_per_second': 12.884, 'epoch': 3.0}


100%|██████████| 33/33 [01:06<00:00,  2.03s/it]


{'train_runtime': 66.9263, 'train_samples_per_second': 3.9, 'train_steps_per_second': 0.493, 'train_loss': 0.4018343145197088, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
100%|██████████| 1/1 [00:00<00:00, 153.62it/s]
Casting the dataset: 100%|██████████| 87/87 [00:00<00:00, 21742.50 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 87/87 [00:01<00:00, 58.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 241.64 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 400.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.4800327718257904, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.753, 'eval_steps_per_second': 11.753, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:43<00:20,  1.86s/it]

{'eval_loss': 0.34981271624565125, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0806, 'eval_samples_per_second': 12.407, 'eval_steps_per_second': 12.407, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:05<00:00,  1.86s/it]

{'eval_loss': 0.33015888929367065, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.612, 'eval_steps_per_second': 11.612, 'epoch': 3.0}


100%|██████████| 33/33 [01:06<00:00,  2.02s/it]


{'train_runtime': 66.6642, 'train_samples_per_second': 3.915, 'train_steps_per_second': 0.495, 'train_loss': 0.4018343145197088, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 105.07it/s]
100%|██████████| 1/1 [00:00<00:00, 153.54it/s]
100%|██████████| 1/1 [00:00<00:00, 124.95it/s]
Casting the dataset: 100%|██████████| 87/87 [00:00<00:00, 21752.87 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 663.76 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.98 examples/s]
Map: 100%|██████████| 87/87 [00:01<00:00, 58.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.77 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.4800327718257904, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0791, 'eval_samples_per_second': 12.641, 'eval_steps_per_second': 12.641, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:44<00:21,  1.92s/it]

{'eval_loss': 0.34981271624565125, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.179, 'eval_steps_per_second': 12.179, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:07<00:00,  1.93s/it]

{'eval_loss': 0.33015888929367065, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.745, 'eval_steps_per_second': 11.745, 'epoch': 3.0}


100%|██████████| 33/33 [01:08<00:00,  2.08s/it]


{'train_runtime': 68.6324, 'train_samples_per_second': 3.803, 'train_steps_per_second': 0.481, 'train_loss': 0.4018343145197088, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.19it/s]
100%|██████████| 1/1 [00:00<00:00, 153.71it/s]
100%|██████████| 1/1 [00:00<00:00, 133.28it/s]
Casting the dataset: 100%|██████████| 87/87 [00:00<00:00, 19305.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 87/87 [00:01<00:00, 58.55 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 398.47 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.4800327718257904, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0762, 'eval_samples_per_second': 13.131, 'eval_steps_per_second': 13.131, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:44<00:21,  1.93s/it]

{'eval_loss': 0.34981271624565125, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.677, 'eval_steps_per_second': 11.677, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:07<00:00,  1.93s/it]

{'eval_loss': 0.33015888929367065, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.959, 'eval_steps_per_second': 11.959, 'epoch': 3.0}


100%|██████████| 33/33 [01:08<00:00,  2.08s/it]


{'train_runtime': 68.7301, 'train_samples_per_second': 3.797, 'train_steps_per_second': 0.48, 'train_loss': 0.4018343145197088, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.01it/s]
100%|██████████| 1/1 [00:00<00:00, 133.05it/s]
100%|██████████| 1/1 [00:00<00:00, 166.39it/s]
Casting the dataset: 100%|██████████| 88/88 [00:00<00:00, 22000.28 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.10 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 88/88 [00:01<00:00, 61.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.4802674651145935, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0777, 'eval_samples_per_second': 12.868, 'eval_steps_per_second': 12.868, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:43<00:21,  1.93s/it]

{'eval_loss': 0.3497641384601593, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0781, 'eval_samples_per_second': 12.804, 'eval_steps_per_second': 12.804, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:06<00:00,  1.94s/it]

{'eval_loss': 0.3300926685333252, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.032, 'eval_steps_per_second': 12.032, 'epoch': 3.0}


100%|██████████| 33/33 [01:07<00:00,  2.06s/it]


{'train_runtime': 67.8329, 'train_samples_per_second': 3.892, 'train_steps_per_second': 0.486, 'train_loss': 0.40186717293479224, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.69it/s]
100%|██████████| 1/1 [00:00<00:00, 153.58it/s]
100%|██████████| 1/1 [00:00<00:00, 133.07it/s]
Casting the dataset: 100%|██████████| 88/88 [00:00<00:00, 17608.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 492.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.75 examples/s]
Map: 100%|██████████| 88/88 [00:01<00:00, 61.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 247.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.4802674651145935, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.179, 'eval_steps_per_second': 12.179, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:43<00:21,  1.92s/it]

{'eval_loss': 0.3497641384601593, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.18, 'eval_steps_per_second': 12.18, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:06<00:00,  1.94s/it]

{'eval_loss': 0.3300926685333252, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.106, 'eval_steps_per_second': 12.106, 'epoch': 3.0}


100%|██████████| 33/33 [01:07<00:00,  2.05s/it]


{'train_runtime': 67.6227, 'train_samples_per_second': 3.904, 'train_steps_per_second': 0.488, 'train_loss': 0.40186717293479224, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.54it/s]
100%|██████████| 1/1 [00:00<00:00, 153.54it/s]
100%|██████████| 1/1 [00:00<00:00, 133.10it/s]
Casting the dataset: 100%|██████████| 88/88 [00:00<00:00, 22010.78 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 848.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Map: 100%|██████████| 88/88 [00:01<00:00, 59.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.51 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.4802674651145935, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.957, 'eval_steps_per_second': 11.957, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:43<00:21,  1.94s/it]

{'eval_loss': 0.3497641384601593, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.036, 'eval_steps_per_second': 12.036, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:06<00:00,  1.94s/it]

{'eval_loss': 0.3300926685333252, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.886, 'eval_steps_per_second': 11.886, 'epoch': 3.0}


100%|██████████| 33/33 [01:07<00:00,  2.05s/it]


{'train_runtime': 67.6623, 'train_samples_per_second': 3.902, 'train_steps_per_second': 0.488, 'train_loss': 0.40186717293479224, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.56it/s]
100%|██████████| 1/1 [00:00<00:00, 153.75it/s]
100%|██████████| 1/1 [00:00<00:00, 124.90it/s]
Casting the dataset: 100%|██████████| 88/88 [00:00<00:00, 22006.84 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.62 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 88/88 [00:01<00:00, 58.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 221.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task t

{'eval_loss': 0.4802674651145935, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.254, 'eval_steps_per_second': 12.254, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:43<00:21,  1.92s/it]

{'eval_loss': 0.3497641384601593, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0867, 'eval_samples_per_second': 11.537, 'eval_steps_per_second': 11.537, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:05<00:00,  1.92s/it]

{'eval_loss': 0.3300926685333252, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.252, 'eval_steps_per_second': 12.252, 'epoch': 3.0}


100%|██████████| 33/33 [01:07<00:00,  2.04s/it]


{'train_runtime': 67.2412, 'train_samples_per_second': 3.926, 'train_steps_per_second': 0.491, 'train_loss': 0.40186717293479224, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.57it/s]
100%|██████████| 1/1 [00:00<00:00, 153.63it/s]
100%|██████████| 1/1 [00:00<00:00, 153.51it/s]
Casting the dataset: 100%|██████████| 88/88 [00:00<00:00, 21674.72 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 798.92 examples/s]
Map: 100%|██████████| 88/88 [00:01<00:00, 61.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task 

{'eval_loss': 0.4802674651145935, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.96, 'eval_steps_per_second': 11.96, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 22/33 [00:43<00:21,  1.93s/it]

{'eval_loss': 0.3497641384601593, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.417, 'eval_steps_per_second': 11.417, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 33/33 [01:06<00:00,  1.92s/it]

{'eval_loss': 0.3300926685333252, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.1325, 'eval_samples_per_second': 7.545, 'eval_steps_per_second': 7.545, 'epoch': 3.0}


100%|██████████| 33/33 [01:07<00:00,  2.04s/it]


{'train_runtime': 67.3851, 'train_samples_per_second': 3.918, 'train_steps_per_second': 0.49, 'train_loss': 0.40186717293479224, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.63it/s]
100%|██████████| 1/1 [00:00<00:00, 142.68it/s]
100%|██████████| 1/1 [00:00<00:00, 153.59it/s]
Casting the dataset: 100%|██████████| 89/89 [00:00<00:00, 13669.73 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 906.88 examples/s]
Map: 100%|██████████| 89/89 [00:01<00:00, 62.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 243.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to

{'eval_loss': 0.4703824818134308, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.108, 'eval_steps_per_second': 12.108, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:18,  1.56s/it]

{'eval_loss': 0.3368990123271942, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.108, 'eval_steps_per_second': 12.108, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.53s/it]

{'eval_loss': 0.3204488158226013, 'eval_precision': 0.75, 'eval_recall': 0.5, 'eval_f1': 0.6, 'eval_accuracy': 0.9444444444444444, 'eval_span_f1': 0.6, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.961, 'eval_steps_per_second': 11.961, 'epoch': 3.0}


100%|██████████| 36/36 [01:08<00:00,  1.91s/it]


{'train_runtime': 68.6958, 'train_samples_per_second': 3.887, 'train_steps_per_second': 0.524, 'train_loss': 0.36995000309414333, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.11it/s]
100%|██████████| 1/1 [00:00<00:00, 153.80it/s]
100%|██████████| 1/1 [00:00<00:00, 133.30it/s]
Casting the dataset: 100%|██████████| 89/89 [00:00<00:00, 17808.08 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Map: 100%|██████████| 89/89 [00:01<00:00, 60.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.65 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.59 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able

{'eval_loss': 0.45686012506484985, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.252, 'eval_steps_per_second': 12.252, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:18,  1.57s/it]

{'eval_loss': 0.36552172899246216, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0976, 'eval_samples_per_second': 10.241, 'eval_steps_per_second': 10.241, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.58s/it]

{'eval_loss': 0.3524056077003479, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.747, 'eval_steps_per_second': 11.747, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.93s/it]


{'train_runtime': 69.6414, 'train_samples_per_second': 3.834, 'train_steps_per_second': 0.517, 'train_loss': 0.2539637088775635, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
100%|██████████| 1/1 [00:00<00:00, 73.99it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 117.47it/s]
Casting the dataset: 100%|██████████| 89/89 [00:00<00:00, 19761.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 89/89 [00:01<00:00, 60.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.45686012506484985, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.257, 'eval_steps_per_second': 12.257, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:18,  1.55s/it]

{'eval_loss': 0.36552172899246216, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.96, 'eval_steps_per_second': 11.96, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.55s/it]

{'eval_loss': 0.3524056077003479, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0791, 'eval_samples_per_second': 12.643, 'eval_steps_per_second': 12.643, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.92s/it]


{'train_runtime': 69.0948, 'train_samples_per_second': 3.864, 'train_steps_per_second': 0.521, 'train_loss': 0.2539637088775635, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.68it/s]
100%|██████████| 1/1 [00:00<00:00, 153.48it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.78it/s]
Casting the dataset: 100%|██████████| 89/89 [00:00<00:00, 16164.77 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1977.51 examples/s]
Map: 100%|██████████| 89/89 [00:01<00:00, 60.18 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 279.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 397.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.45686012506484985, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0825, 'eval_samples_per_second': 12.116, 'eval_steps_per_second': 12.116, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:18,  1.54s/it]

{'eval_loss': 0.36552172899246216, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0851, 'eval_samples_per_second': 11.751, 'eval_steps_per_second': 11.751, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.54s/it]

{'eval_loss': 0.3524056077003479, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0821, 'eval_samples_per_second': 12.182, 'eval_steps_per_second': 12.182, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.92s/it]


{'train_runtime': 69.1695, 'train_samples_per_second': 3.86, 'train_steps_per_second': 0.52, 'train_loss': 0.2539637088775635, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.57it/s]
100%|██████████| 1/1 [00:00<00:00, 133.12it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.65it/s]
Casting the dataset: 100%|██████████| 89/89 [00:00<00:00, 14831.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 988.52 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.46 examples/s]
Map: 100%|██████████| 89/89 [00:01<00:00, 59.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 330.57 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.90 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.45686012506484985, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0791, 'eval_samples_per_second': 12.64, 'eval_steps_per_second': 12.64, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:18,  1.56s/it]

{'eval_loss': 0.36552172899246216, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.11, 'eval_steps_per_second': 12.11, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.55s/it]

{'eval_loss': 0.3524056077003479, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.105, 'eval_steps_per_second': 12.105, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.92s/it]


{'train_runtime': 69.1329, 'train_samples_per_second': 3.862, 'train_steps_per_second': 0.521, 'train_loss': 0.2539637088775635, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.10it/s]
100%|██████████| 1/1 [00:00<00:00, 153.60it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
Casting the dataset: 100%|██████████| 90/90 [00:00<00:00, 17838.82 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.39 examples/s]
Map: 100%|██████████| 90/90 [00:01<00:00, 61.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.4566035270690918, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0852, 'eval_samples_per_second': 11.735, 'eval_steps_per_second': 11.735, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:19,  1.59s/it]

{'eval_loss': 0.3658033013343811, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0881, 'eval_samples_per_second': 11.347, 'eval_steps_per_second': 11.347, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.59s/it]

{'eval_loss': 0.35258805751800537, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.482, 'eval_steps_per_second': 11.482, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.93s/it]


{'train_runtime': 69.4758, 'train_samples_per_second': 3.886, 'train_steps_per_second': 0.518, 'train_loss': 0.2532244523366292, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 83.20it/s]
100%|██████████| 1/1 [00:00<00:00, 133.09it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.32it/s]
Casting the dataset: 100%|██████████| 90/90 [00:00<00:00, 19995.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 664.39 examples/s]
Map: 100%|██████████| 90/90 [00:01<00:00, 61.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 398.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.92 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should proba

{'eval_loss': 0.4566035270690918, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1332, 'eval_samples_per_second': 7.51, 'eval_steps_per_second': 7.51, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:19,  1.61s/it]

{'eval_loss': 0.3658033013343811, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0776, 'eval_samples_per_second': 12.889, 'eval_steps_per_second': 12.889, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.61s/it]

{'eval_loss': 0.35258805751800537, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.962, 'eval_steps_per_second': 11.962, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.95s/it]


{'train_runtime': 70.0623, 'train_samples_per_second': 3.854, 'train_steps_per_second': 0.514, 'train_loss': 0.2532244523366292, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.77it/s]
100%|██████████| 1/1 [00:00<00:00, 142.08it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.83it/s]
Casting the dataset: 100%|██████████| 90/90 [00:00<00:00, 16359.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 90/90 [00:01<00:00, 60.72 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 310.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.63 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 284.61 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.4566035270690918, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0776, 'eval_samples_per_second': 12.888, 'eval_steps_per_second': 12.888, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:19,  1.62s/it]

{'eval_loss': 0.3658033013343811, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0861, 'eval_samples_per_second': 11.616, 'eval_steps_per_second': 11.616, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.60s/it]

{'eval_loss': 0.35258805751800537, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0766, 'eval_samples_per_second': 13.057, 'eval_steps_per_second': 13.057, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.95s/it]


{'train_runtime': 70.2794, 'train_samples_per_second': 3.842, 'train_steps_per_second': 0.512, 'train_loss': 0.2532244523366292, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.82it/s]
Casting the dataset: 100%|██████████| 90/90 [00:00<00:00, 17875.15 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 663.66 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.27 examples/s]
Map: 100%|██████████| 90/90 [00:01<00:00, 60.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.82 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.4566035270690918, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0871, 'eval_samples_per_second': 11.477, 'eval_steps_per_second': 11.477, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:19,  1.61s/it]

{'eval_loss': 0.3658033013343811, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.248, 'eval_steps_per_second': 12.248, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.61s/it]

{'eval_loss': 0.35258805751800537, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0776, 'eval_samples_per_second': 12.888, 'eval_steps_per_second': 12.888, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.94s/it]


{'train_runtime': 69.8357, 'train_samples_per_second': 3.866, 'train_steps_per_second': 0.515, 'train_loss': 0.2532244523366292, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.49it/s]
100%|██████████| 1/1 [00:00<00:00, 133.00it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.77it/s]
Casting the dataset: 100%|██████████| 90/90 [00:00<00:00, 17988.44 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 665.02 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.98 examples/s]
Map: 100%|██████████| 90/90 [00:01<00:00, 59.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 330.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.4566035270690918, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0776, 'eval_samples_per_second': 12.886, 'eval_steps_per_second': 12.886, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:19,  1.60s/it]

{'eval_loss': 0.3658033013343811, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.82, 'eval_steps_per_second': 11.82, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.61s/it]

{'eval_loss': 0.35258805751800537, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0816, 'eval_samples_per_second': 12.253, 'eval_steps_per_second': 12.253, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.94s/it]


{'train_runtime': 69.7648, 'train_samples_per_second': 3.87, 'train_steps_per_second': 0.516, 'train_loss': 0.2532244523366292, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 153.58it/s]
100%|██████████| 1/1 [00:00<00:00, 153.57it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.11it/s]
Casting the dataset: 100%|██████████| 91/91 [00:00<00:00, 20185.19 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.51 examples/s]
Map: 100%|██████████| 91/91 [00:01<00:00, 60.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 285.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 321.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.45711997151374817, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.412, 'eval_steps_per_second': 11.412, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:20,  1.69s/it]

{'eval_loss': 0.3653922379016876, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.028, 'eval_steps_per_second': 12.028, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.64s/it]

{'eval_loss': 0.35234901309013367, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.76, 'eval_steps_per_second': 11.76, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.96s/it]


{'train_runtime': 70.4802, 'train_samples_per_second': 3.873, 'train_steps_per_second': 0.511, 'train_loss': 0.25293127695719403, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 124.97it/s]
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
Casting the dataset: 100%|██████████| 91/91 [00:00<00:00, 18193.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.68 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Map: 100%|██████████| 91/91 [00:01<00:00, 61.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 243.49 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.45711997151374817, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1281, 'eval_samples_per_second': 7.805, 'eval_steps_per_second': 7.805, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:19,  1.62s/it]

{'eval_loss': 0.3653922379016876, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.1097, 'eval_samples_per_second': 9.116, 'eval_steps_per_second': 9.116, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.64s/it]

{'eval_loss': 0.35234901309013367, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0862, 'eval_samples_per_second': 11.597, 'eval_steps_per_second': 11.597, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.94s/it]


{'train_runtime': 69.672, 'train_samples_per_second': 3.918, 'train_steps_per_second': 0.517, 'train_loss': 0.25293127695719403, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 163.66it/s]
100%|██████████| 1/1 [00:00<00:00, 166.71it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.88it/s]
Casting the dataset: 100%|██████████| 91/91 [00:00<00:00, 18190.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 988.99 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Map: 100%|██████████| 91/91 [00:01<00:00, 60.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.45711997151374817, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.761, 'eval_steps_per_second': 11.761, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:19,  1.62s/it]

{'eval_loss': 0.3653922379016876, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0834, 'eval_samples_per_second': 11.986, 'eval_steps_per_second': 11.986, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:07<00:00,  1.63s/it]

{'eval_loss': 0.35234901309013367, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.295, 'eval_steps_per_second': 12.295, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.92s/it]


{'train_runtime': 69.1591, 'train_samples_per_second': 3.947, 'train_steps_per_second': 0.521, 'train_loss': 0.25293127695719403, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 125.02it/s]
Casting the dataset: 100%|██████████| 91/91 [00:00<00:00, 18201.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1003.90 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 91/91 [00:01<00:00, 60.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.45711997151374817, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0856, 'eval_samples_per_second': 11.688, 'eval_steps_per_second': 11.688, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:19,  1.64s/it]

{'eval_loss': 0.3653922379016876, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.1253, 'eval_samples_per_second': 7.982, 'eval_steps_per_second': 7.982, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.64s/it]

{'eval_loss': 0.35234901309013367, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0863, 'eval_samples_per_second': 11.587, 'eval_steps_per_second': 11.587, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.93s/it]


{'train_runtime': 69.588, 'train_samples_per_second': 3.923, 'train_steps_per_second': 0.517, 'train_loss': 0.25293127695719403, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 133.18it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
Casting the dataset: 100%|██████████| 91/91 [00:00<00:00, 18199.58 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1008.49 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.94 examples/s]
Map: 100%|██████████| 91/91 [00:01<00:00, 60.24 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 324.39 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.84 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 244.87 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.45711997151374817, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.553, 'eval_steps_per_second': 11.553, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:19,  1.63s/it]

{'eval_loss': 0.3653922379016876, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.1345, 'eval_samples_per_second': 7.437, 'eval_steps_per_second': 7.437, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.64s/it]

{'eval_loss': 0.35234901309013367, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0795, 'eval_samples_per_second': 12.583, 'eval_steps_per_second': 12.583, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.94s/it]


{'train_runtime': 69.8796, 'train_samples_per_second': 3.907, 'train_steps_per_second': 0.515, 'train_loss': 0.25293127695719403, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.90it/s]
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.92it/s]
Casting the dataset: 100%|██████████| 92/92 [00:00<00:00, 15332.01 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 986.66 examples/s]
Map: 100%|██████████| 92/92 [00:01<00:00, 59.38 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.4572020173072815, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0794, 'eval_samples_per_second': 12.589, 'eval_steps_per_second': 12.589, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:20,  1.71s/it]

{'eval_loss': 0.3650875389575958, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.1159, 'eval_samples_per_second': 8.632, 'eval_steps_per_second': 8.632, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.71s/it]

{'eval_loss': 0.35222122073173523, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0846, 'eval_samples_per_second': 11.821, 'eval_steps_per_second': 11.821, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.96s/it]


{'train_runtime': 70.7313, 'train_samples_per_second': 3.902, 'train_steps_per_second': 0.509, 'train_loss': 0.2528192467159695, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
100%|██████████| 1/1 [00:00<00:00, 166.63it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 140.52it/s]
Casting the dataset: 100%|██████████| 92/92 [00:00<00:00, 18400.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.04 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 92/92 [00:01<00:00, 60.86 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.68 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.4572020173072815, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0885, 'eval_samples_per_second': 11.3, 'eval_steps_per_second': 11.3, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:20,  1.68s/it]

{'eval_loss': 0.3650875389575958, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0932, 'eval_samples_per_second': 10.732, 'eval_steps_per_second': 10.732, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.65s/it]

{'eval_loss': 0.35222122073173523, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.1398, 'eval_samples_per_second': 7.153, 'eval_steps_per_second': 7.153, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.95s/it]


{'train_runtime': 70.2993, 'train_samples_per_second': 3.926, 'train_steps_per_second': 0.512, 'train_loss': 0.2528192467159695, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
100%|██████████| 1/1 [00:00<00:00, 162.73it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
Casting the dataset: 100%|██████████| 92/92 [00:00<00:00, 15322.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 92/92 [00:01<00:00, 59.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.14 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 246.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.4572020173072815, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0796, 'eval_samples_per_second': 12.562, 'eval_steps_per_second': 12.562, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:20,  1.67s/it]

{'eval_loss': 0.3650875389575958, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0866, 'eval_samples_per_second': 11.552, 'eval_steps_per_second': 11.552, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.65s/it]

{'eval_loss': 0.35222122073173523, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0805, 'eval_samples_per_second': 12.429, 'eval_steps_per_second': 12.429, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.93s/it]


{'train_runtime': 69.5184, 'train_samples_per_second': 3.97, 'train_steps_per_second': 0.518, 'train_loss': 0.2528192467159695, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.81it/s]
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
Casting the dataset: 100%|██████████| 92/92 [00:00<00:00, 18410.11 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 927.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 92/92 [00:01<00:00, 58.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.08 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 324.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.4572020173072815, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0886, 'eval_samples_per_second': 11.283, 'eval_steps_per_second': 11.283, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:44<00:19,  1.64s/it]

{'eval_loss': 0.3650875389575958, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.3, 'eval_steps_per_second': 12.3, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.68s/it]

{'eval_loss': 0.35222122073173523, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.848, 'eval_steps_per_second': 11.848, 'epoch': 3.0}


100%|██████████| 36/36 [01:09<00:00,  1.93s/it]


{'train_runtime': 69.4204, 'train_samples_per_second': 3.976, 'train_steps_per_second': 0.519, 'train_loss': 0.2528192467159695, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.88it/s]
100%|██████████| 1/1 [00:00<00:00, 142.79it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
Casting the dataset: 100%|██████████| 92/92 [00:00<00:00, 18403.97 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.07 examples/s]
Map: 100%|██████████| 92/92 [00:01<00:00, 57.75 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.60 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 287.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.4572020173072815, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.969, 'eval_steps_per_second': 11.969, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:20,  1.69s/it]

{'eval_loss': 0.3650875389575958, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0886, 'eval_samples_per_second': 11.29, 'eval_steps_per_second': 11.29, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.67s/it]

{'eval_loss': 0.35222122073173523, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0845, 'eval_samples_per_second': 11.838, 'eval_steps_per_second': 11.838, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.95s/it]


{'train_runtime': 70.2541, 'train_samples_per_second': 3.929, 'train_steps_per_second': 0.512, 'train_loss': 0.2528192467159695, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
100%|██████████| 1/1 [00:00<00:00, 140.91it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
Casting the dataset: 100%|██████████| 93/93 [00:00<00:00, 15283.09 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.38 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 93/93 [00:01<00:00, 60.19 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.03 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.79 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.4573948085308075, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0853, 'eval_samples_per_second': 11.72, 'eval_steps_per_second': 11.72, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:20,  1.73s/it]

{'eval_loss': 0.3649231195449829, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.263, 'eval_steps_per_second': 12.263, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.78s/it]

{'eval_loss': 0.3520244061946869, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.277, 'eval_steps_per_second': 12.277, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.97s/it]


{'train_runtime': 70.8601, 'train_samples_per_second': 3.937, 'train_steps_per_second': 0.508, 'train_loss': 0.2528143988715278, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.72it/s]
100%|██████████| 1/1 [00:00<00:00, 141.24it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.82it/s]
Casting the dataset: 100%|██████████| 93/93 [00:00<00:00, 18283.97 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 995.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.69 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Map: 100%|██████████| 93/93 [00:01<00:00, 58.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.4573948085308075, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0755, 'eval_samples_per_second': 13.245, 'eval_steps_per_second': 13.245, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:21,  1.76s/it]

{'eval_loss': 0.3649231195449829, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0812, 'eval_samples_per_second': 12.314, 'eval_steps_per_second': 12.314, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.73s/it]

{'eval_loss': 0.3520244061946869, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0809, 'eval_samples_per_second': 12.364, 'eval_steps_per_second': 12.364, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.97s/it]


{'train_runtime': 70.8263, 'train_samples_per_second': 3.939, 'train_steps_per_second': 0.508, 'train_loss': 0.2528143988715278, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.91it/s]
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 109.71it/s]
Casting the dataset: 100%|██████████| 93/93 [00:00<00:00, 18434.32 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Map: 100%|██████████| 93/93 [00:01<00:00, 58.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.97 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.4573948085308075, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0775, 'eval_samples_per_second': 12.911, 'eval_steps_per_second': 12.911, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:20,  1.74s/it]

{'eval_loss': 0.3649231195449829, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0844, 'eval_samples_per_second': 11.848, 'eval_steps_per_second': 11.848, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.78s/it]

{'eval_loss': 0.3520244061946869, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0752, 'eval_samples_per_second': 13.294, 'eval_steps_per_second': 13.294, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.96s/it]


{'train_runtime': 70.714, 'train_samples_per_second': 3.945, 'train_steps_per_second': 0.509, 'train_loss': 0.2528143988715278, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
100%|██████████| 1/1 [00:00<00:00, 142.82it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
Casting the dataset: 100%|██████████| 93/93 [00:00<00:00, 18601.35 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1961.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Map: 100%|██████████| 93/93 [00:01<00:00, 57.88 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.4573948085308075, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0875, 'eval_samples_per_second': 11.434, 'eval_steps_per_second': 11.434, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:20,  1.74s/it]

{'eval_loss': 0.3649231195449829, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0845, 'eval_samples_per_second': 11.839, 'eval_steps_per_second': 11.839, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.76s/it]

{'eval_loss': 0.3520244061946869, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0822, 'eval_samples_per_second': 12.161, 'eval_steps_per_second': 12.161, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.96s/it]


{'train_runtime': 70.6366, 'train_samples_per_second': 3.95, 'train_steps_per_second': 0.51, 'train_loss': 0.2528143988715278, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 161.66it/s]
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.63it/s]
Casting the dataset: 100%|██████████| 93/93 [00:00<00:00, 20622.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 931.86 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 93/93 [00:01<00:00, 58.92 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 281.21 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.4573948085308075, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0832, 'eval_samples_per_second': 12.024, 'eval_steps_per_second': 12.024, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:20,  1.74s/it]

{'eval_loss': 0.3649231195449829, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0894, 'eval_samples_per_second': 11.182, 'eval_steps_per_second': 11.182, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.72s/it]

{'eval_loss': 0.3520244061946869, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0841, 'eval_samples_per_second': 11.897, 'eval_steps_per_second': 11.897, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.95s/it]


{'train_runtime': 70.2695, 'train_samples_per_second': 3.97, 'train_steps_per_second': 0.512, 'train_loss': 0.2528143988715278, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.69it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.86it/s]
Casting the dataset: 100%|██████████| 94/94 [00:00<00:00, 15666.56 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 940.85 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 94/94 [00:01<00:00, 58.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 245.70 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.4571482539176941, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0835, 'eval_samples_per_second': 11.981, 'eval_steps_per_second': 11.981, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:21,  1.77s/it]

{'eval_loss': 0.36462920904159546, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0781, 'eval_samples_per_second': 12.809, 'eval_steps_per_second': 12.809, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.78s/it]

{'eval_loss': 0.3517250120639801, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0826, 'eval_samples_per_second': 12.113, 'eval_steps_per_second': 12.113, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.96s/it]


{'train_runtime': 70.7196, 'train_samples_per_second': 3.988, 'train_steps_per_second': 0.509, 'train_loss': 0.25275452931722003, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.85it/s]
100%|██████████| 1/1 [00:00<00:00, 142.89it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.95it/s]
Casting the dataset: 100%|██████████| 94/94 [00:00<00:00, 18804.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.75 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 996.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.27 examples/s]
Map: 100%|██████████| 94/94 [00:01<00:00, 58.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.4571482539176941, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0782, 'eval_samples_per_second': 12.789, 'eval_steps_per_second': 12.789, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:21,  1.79s/it]

{'eval_loss': 0.36462920904159546, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0815, 'eval_samples_per_second': 12.273, 'eval_steps_per_second': 12.273, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.79s/it]

{'eval_loss': 0.3517250120639801, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0755, 'eval_samples_per_second': 13.243, 'eval_steps_per_second': 13.243, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  1.98s/it]


{'train_runtime': 71.2636, 'train_samples_per_second': 3.957, 'train_steps_per_second': 0.505, 'train_loss': 0.25275452931722003, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 166.65it/s]
100%|██████████| 1/1 [00:00<00:00, 142.83it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.82it/s]
Casting the dataset: 100%|██████████| 94/94 [00:00<00:00, 15525.89 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.17 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.79 examples/s]
Map: 100%|██████████| 94/94 [00:01<00:00, 57.40 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 249.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.4571482539176941, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.278, 'eval_steps_per_second': 12.278, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:21,  1.79s/it]

{'eval_loss': 0.36462920904159546, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.13, 'eval_steps_per_second': 12.13, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:08<00:00,  1.75s/it]

{'eval_loss': 0.3517250120639801, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0817, 'eval_samples_per_second': 12.234, 'eval_steps_per_second': 12.234, 'epoch': 3.0}


100%|██████████| 36/36 [01:10<00:00,  1.95s/it]


{'train_runtime': 70.3199, 'train_samples_per_second': 4.01, 'train_steps_per_second': 0.512, 'train_loss': 0.25275452931722003, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.84it/s]
100%|██████████| 1/1 [00:00<00:00, 166.67it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.87it/s]
Casting the dataset: 100%|██████████| 94/94 [00:00<00:00, 13426.80 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 906.29 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Map: 100%|██████████| 94/94 [00:01<00:00, 57.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.54 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.03 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.4571482539176941, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0791, 'eval_samples_per_second': 12.648, 'eval_steps_per_second': 12.648, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:21,  1.78s/it]

{'eval_loss': 0.36462920904159546, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0813, 'eval_samples_per_second': 12.306, 'eval_steps_per_second': 12.306, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:10<00:00,  1.82s/it]

{'eval_loss': 0.3517250120639801, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.009, 'eval_steps_per_second': 12.009, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  1.99s/it]


{'train_runtime': 71.6034, 'train_samples_per_second': 3.938, 'train_steps_per_second': 0.503, 'train_loss': 0.25275452931722003, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 124.83it/s]
100%|██████████| 1/1 [00:00<00:00, 146.48it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 144.27it/s]
Casting the dataset: 100%|██████████| 94/94 [00:00<00:00, 9493.95 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.88 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 94/94 [00:01<00:00, 58.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.12 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 499.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this mod

{'eval_loss': 0.4571482539176941, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0814, 'eval_samples_per_second': 12.292, 'eval_steps_per_second': 12.292, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:21,  1.82s/it]

{'eval_loss': 0.36462920904159546, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0774, 'eval_samples_per_second': 12.916, 'eval_steps_per_second': 12.916, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:10<00:00,  1.78s/it]

{'eval_loss': 0.3517250120639801, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.049, 'eval_steps_per_second': 12.049, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  1.98s/it]


{'train_runtime': 71.3351, 'train_samples_per_second': 3.953, 'train_steps_per_second': 0.505, 'train_loss': 0.25275452931722003, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 145.65it/s]
100%|██████████| 1/1 [00:00<00:00, 169.98it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 104.77it/s]
Casting the dataset: 100%|██████████| 95/95 [00:00<00:00, 10933.46 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 983.19 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 499.80 examples/s]
Map: 100%|██████████| 95/95 [00:01<00:00, 56.36 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 144.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 272.16 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 130.63 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.4569409191608429, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0806, 'eval_samples_per_second': 12.41, 'eval_steps_per_second': 12.41, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:21,  1.82s/it]

{'eval_loss': 0.3646991550922394, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.001, 'eval_steps_per_second': 12.001, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:10<00:00,  1.86s/it]

{'eval_loss': 0.35188034176826477, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0805, 'eval_samples_per_second': 12.417, 'eval_steps_per_second': 12.417, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  1.99s/it]


{'train_runtime': 71.5652, 'train_samples_per_second': 3.982, 'train_steps_per_second': 0.503, 'train_loss': 0.25271855460272896, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 127.07it/s]
100%|██████████| 1/1 [00:00<00:00, 144.33it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 147.06it/s]
Casting the dataset: 100%|██████████| 95/95 [00:00<00:00, 9739.18 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 967.99 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.51 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.46 examples/s]
Map: 100%|██████████| 95/95 [00:01<00:00, 57.00 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 534.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 64.48 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should prob

{'eval_loss': 0.4569409191608429, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.085, 'eval_samples_per_second': 11.77, 'eval_steps_per_second': 11.77, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:21,  1.82s/it]

{'eval_loss': 0.3646991550922394, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0746, 'eval_samples_per_second': 13.404, 'eval_steps_per_second': 13.404, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:10<00:00,  1.85s/it]

{'eval_loss': 0.35188034176826477, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0892, 'eval_samples_per_second': 11.216, 'eval_steps_per_second': 11.216, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  1.99s/it]


{'train_runtime': 71.5117, 'train_samples_per_second': 3.985, 'train_steps_per_second': 0.503, 'train_loss': 0.25271855460272896, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 145.46it/s]
100%|██████████| 1/1 [00:00<00:00, 167.95it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 127.54it/s]
Casting the dataset: 100%|██████████| 95/95 [00:00<00:00, 14054.99 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.60 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 95/95 [00:01<00:00, 57.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 425.30 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 250.06 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 142.11 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAI

{'eval_loss': 0.4569409191608429, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0763, 'eval_samples_per_second': 13.11, 'eval_steps_per_second': 13.11, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:22,  1.85s/it]

{'eval_loss': 0.3646991550922394, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0836, 'eval_samples_per_second': 11.962, 'eval_steps_per_second': 11.962, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:10<00:00,  1.85s/it]

{'eval_loss': 0.35188034176826477, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0775, 'eval_samples_per_second': 12.907, 'eval_steps_per_second': 12.907, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  1.99s/it]


{'train_runtime': 71.7073, 'train_samples_per_second': 3.974, 'train_steps_per_second': 0.502, 'train_loss': 0.25271855460272896, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 146.70it/s]
100%|██████████| 1/1 [00:00<00:00, 144.66it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 466.29it/s]
Casting the dataset: 100%|██████████| 95/95 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 500.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.41 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 95/95 [00:01<00:00, 56.93 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 339.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 139.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN 

{'eval_loss': 0.4569409191608429, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0807, 'eval_samples_per_second': 12.398, 'eval_steps_per_second': 12.398, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:22,  1.83s/it]

{'eval_loss': 0.3646991550922394, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0832, 'eval_samples_per_second': 12.024, 'eval_steps_per_second': 12.024, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:10<00:00,  1.84s/it]

{'eval_loss': 0.35188034176826477, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0725, 'eval_samples_per_second': 13.791, 'eval_steps_per_second': 13.791, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  2.00s/it]


{'train_runtime': 71.8609, 'train_samples_per_second': 3.966, 'train_steps_per_second': 0.501, 'train_loss': 0.25271855460272896, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 144.06it/s]
100%|██████████| 1/1 [00:00<00:00, 161.61it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 396.29it/s]
Casting the dataset: 100%|██████████| 95/95 [00:00<00:00, 8273.13 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.83 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.31 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 977.24 examples/s]
Map: 100%|██████████| 95/95 [00:01<00:00, 58.11 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.85 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 328.50 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 111.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sh

{'eval_loss': 0.4569409191608429, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0764, 'eval_samples_per_second': 13.093, 'eval_steps_per_second': 13.093, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:45<00:22,  1.84s/it]

{'eval_loss': 0.3646991550922394, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.083, 'eval_samples_per_second': 12.053, 'eval_steps_per_second': 12.053, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:09<00:00,  1.85s/it]

{'eval_loss': 0.35188034176826477, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0831, 'eval_samples_per_second': 12.03, 'eval_steps_per_second': 12.03, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  1.98s/it]


{'train_runtime': 71.2857, 'train_samples_per_second': 3.998, 'train_steps_per_second': 0.505, 'train_loss': 0.25271855460272896, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 142.04it/s]
100%|██████████| 1/1 [00:00<00:00, 146.02it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 188.69it/s]
Casting the dataset: 100%|██████████| 96/96 [00:00<00:00, 13780.05 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 150.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Map: 100%|██████████| 96/96 [00:01<00:00, 56.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.17 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 331.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRA

{'eval_loss': 0.45746371150016785, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0882, 'eval_samples_per_second': 11.343, 'eval_steps_per_second': 11.343, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:22,  1.90s/it]

{'eval_loss': 0.3644806444644928, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0786, 'eval_samples_per_second': 12.721, 'eval_steps_per_second': 12.721, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:10<00:00,  1.88s/it]

{'eval_loss': 0.3516612946987152, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0875, 'eval_samples_per_second': 11.432, 'eval_steps_per_second': 11.432, 'epoch': 3.0}


100%|██████████| 36/36 [01:12<00:00,  2.00s/it]


{'train_runtime': 72.0921, 'train_samples_per_second': 3.995, 'train_steps_per_second': 0.499, 'train_loss': 0.25276724497477215, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 143.56it/s]
100%|██████████| 1/1 [00:00<00:00, 170.81it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 167.42it/s]
Casting the dataset: 100%|██████████| 96/96 [00:00<00:00, 15949.19 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.03 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 96/96 [00:01<00:00, 57.13 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 317.73 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 351.28 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pr

{'eval_loss': 0.45746371150016785, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0854, 'eval_samples_per_second': 11.713, 'eval_steps_per_second': 11.713, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:22,  1.90s/it]

{'eval_loss': 0.3644806444644928, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0804, 'eval_samples_per_second': 12.434, 'eval_steps_per_second': 12.434, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:10<00:00,  1.87s/it]

{'eval_loss': 0.3516612946987152, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0794, 'eval_samples_per_second': 12.595, 'eval_steps_per_second': 12.595, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  2.00s/it]


{'train_runtime': 71.9748, 'train_samples_per_second': 4.001, 'train_steps_per_second': 0.5, 'train_loss': 0.25276724497477215, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 149.02it/s]
100%|██████████| 1/1 [00:00<00:00, 149.99it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 116.41it/s]
Casting the dataset: 100%|██████████| 96/96 [00:00<00:00, 16524.53 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 139.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 96/96 [00:01<00:00, 57.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 332.83 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this mo

{'eval_loss': 0.45746371150016785, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0765, 'eval_samples_per_second': 13.078, 'eval_steps_per_second': 13.078, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:22,  1.88s/it]

{'eval_loss': 0.3644806444644928, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0793, 'eval_samples_per_second': 12.603, 'eval_steps_per_second': 12.603, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:10<00:00,  1.90s/it]

{'eval_loss': 0.3516612946987152, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0876, 'eval_samples_per_second': 11.417, 'eval_steps_per_second': 11.417, 'epoch': 3.0}


100%|██████████| 36/36 [01:12<00:00,  2.01s/it]


{'train_runtime': 72.2112, 'train_samples_per_second': 3.988, 'train_steps_per_second': 0.499, 'train_loss': 0.25276724497477215, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 134.11it/s]
100%|██████████| 1/1 [00:00<00:00, 144.35it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 143.68it/s]
Casting the dataset: 100%|██████████| 96/96 [00:00<00:00, 16100.33 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.27 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 96/96 [00:01<00:00, 58.51 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.15 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 335.20 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 139.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You 

{'eval_loss': 0.45746371150016785, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0832, 'eval_samples_per_second': 12.013, 'eval_steps_per_second': 12.013, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:22,  1.85s/it]

{'eval_loss': 0.3644806444644928, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.008, 'eval_steps_per_second': 12.008, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:10<00:00,  1.88s/it]

{'eval_loss': 0.3516612946987152, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0899, 'eval_samples_per_second': 11.125, 'eval_steps_per_second': 11.125, 'epoch': 3.0}


100%|██████████| 36/36 [01:11<00:00,  1.99s/it]


{'train_runtime': 71.5862, 'train_samples_per_second': 4.023, 'train_steps_per_second': 0.503, 'train_loss': 0.25276724497477215, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 147.67it/s]
100%|██████████| 1/1 [00:00<00:00, 144.85it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 129.56it/s]
Casting the dataset: 100%|██████████| 96/96 [00:00<00:00, 19248.20 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 143.17 examples/s]
Map: 100%|██████████| 96/96 [00:01<00:00, 56.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a 

{'eval_loss': 0.45746371150016785, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0874, 'eval_samples_per_second': 11.442, 'eval_steps_per_second': 11.442, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 24/36 [00:46<00:22,  1.89s/it]

{'eval_loss': 0.3644806444644928, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0909, 'eval_samples_per_second': 10.996, 'eval_steps_per_second': 10.996, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 36/36 [01:10<00:00,  1.88s/it]

{'eval_loss': 0.3516612946987152, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0823, 'eval_samples_per_second': 12.144, 'eval_steps_per_second': 12.144, 'epoch': 3.0}


100%|██████████| 36/36 [01:12<00:00,  2.00s/it]


{'train_runtime': 72.0324, 'train_samples_per_second': 3.998, 'train_steps_per_second': 0.5, 'train_loss': 0.25276724497477215, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 146.04it/s]
100%|██████████| 1/1 [00:00<00:00, 146.02it/s]
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 145.55it/s]
Casting the dataset: 100%|██████████| 97/97 [00:00<00:00, 13716.12 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 997.93 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 564.97 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1000.55 examples/s]
Map: 100%|██████████| 97/97 [00:01<00:00, 55.66 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 340.25 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 330.39 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.44040024280548096, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0842, 'eval_samples_per_second': 11.877, 'eval_steps_per_second': 11.877, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 26/39 [00:48<00:20,  1.55s/it]

{'eval_loss': 0.3562794625759125, 'eval_precision': 1.0, 'eval_recall': 0.16666666666666666, 'eval_f1': 0.2857142857142857, 'eval_accuracy': 0.9285714285714286, 'eval_span_f1': 0.2857142857142857, 'eval_runtime': 0.0774, 'eval_samples_per_second': 12.916, 'eval_steps_per_second': 12.916, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 39/39 [01:13<00:00,  1.53s/it]

{'eval_loss': 0.3458663821220398, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.007, 'eval_steps_per_second': 12.007, 'epoch': 3.0}


100%|██████████| 39/39 [01:14<00:00,  1.90s/it]


{'train_runtime': 74.2272, 'train_samples_per_second': 3.92, 'train_steps_per_second': 0.525, 'train_loss': 0.23295231354542267, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 146.18it/s]
100%|██████████| 1/1 [00:00<00:00, 84.63it/s]
100%|██████████| 1/1 [00:00<00:00, 149.83it/s]
Casting the dataset: 100%|██████████| 97/97 [00:00<00:00, 13805.48 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 149.16 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 998.64 examples/s]
Map: 100%|██████████| 97/97 [00:01<00:00, 56.32 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 111.02 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 564.81 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 330.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You sho

{'eval_loss': 0.44592025876045227, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0781, 'eval_samples_per_second': 12.806, 'eval_steps_per_second': 12.806, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 26/39 [00:48<00:19,  1.52s/it]

{'eval_loss': 0.3891408145427704, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.136, 'eval_steps_per_second': 12.136, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 39/39 [01:12<00:00,  1.54s/it]

{'eval_loss': 0.37858134508132935, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0835, 'eval_samples_per_second': 11.983, 'eval_steps_per_second': 11.983, 'epoch': 3.0}


100%|██████████| 39/39 [01:14<00:00,  1.90s/it]


{'train_runtime': 74.1645, 'train_samples_per_second': 3.924, 'train_steps_per_second': 0.526, 'train_loss': 0.34212339841402495, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 143.94it/s]
100%|██████████| 1/1 [00:00<00:00, 146.15it/s]
100%|██████████| 1/1 [00:00<00:00, 365.13it/s]
Casting the dataset: 100%|██████████| 97/97 [00:00<00:00, 12446.39 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1002.22 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 939.37 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 97/97 [00:01<00:00, 57.95 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 320.62 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 142.09 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 337.43 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should pro

{'eval_loss': 0.44592025876045227, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0833, 'eval_samples_per_second': 12.001, 'eval_steps_per_second': 12.001, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 26/39 [00:47<00:19,  1.51s/it]

{'eval_loss': 0.3891408145427704, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0824, 'eval_samples_per_second': 12.143, 'eval_steps_per_second': 12.143, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 39/39 [01:12<00:00,  1.55s/it]

{'eval_loss': 0.37858134508132935, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0806, 'eval_samples_per_second': 12.411, 'eval_steps_per_second': 12.411, 'epoch': 3.0}


100%|██████████| 39/39 [01:13<00:00,  1.90s/it]


{'train_runtime': 73.9544, 'train_samples_per_second': 3.935, 'train_steps_per_second': 0.527, 'train_loss': 0.34212339841402495, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 144.83it/s]
100%|██████████| 1/1 [00:00<00:00, 169.64it/s]
100%|██████████| 1/1 [00:00<00:00, 86.43it/s]
Casting the dataset: 100%|██████████| 97/97 [00:00<00:00, 8892.84 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1145.98 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1961.79 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 671.20 examples/s]
Map: 100%|██████████| 97/97 [00:01<00:00, 57.05 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 128.44 examples/s]
Map: 100%|██████████| 1/1 [00:00<?, ? examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should prob

{'eval_loss': 0.44592025876045227, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0793, 'eval_samples_per_second': 12.604, 'eval_steps_per_second': 12.604, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 26/39 [00:48<00:19,  1.53s/it]

{'eval_loss': 0.3891408145427704, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0783, 'eval_samples_per_second': 12.764, 'eval_steps_per_second': 12.764, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 39/39 [01:13<00:00,  1.57s/it]

{'eval_loss': 0.37858134508132935, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.087, 'eval_samples_per_second': 11.491, 'eval_steps_per_second': 11.491, 'epoch': 3.0}


100%|██████████| 39/39 [01:14<00:00,  1.91s/it]


{'train_runtime': 74.4009, 'train_samples_per_second': 3.911, 'train_steps_per_second': 0.524, 'train_loss': 0.34212339841402495, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 155.32it/s]
100%|██████████| 1/1 [00:00<00:00, 143.38it/s]
100%|██████████| 1/1 [00:00<00:00, 151.07it/s]
Casting the dataset: 100%|██████████| 97/97 [00:00<00:00, 16022.66 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 497.49 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 1001.74 examples/s]
Casting the dataset: 100%|██████████| 1/1 [00:00<00:00, 999.36 examples/s]
Map: 100%|██████████| 97/97 [00:01<00:00, 56.10 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 252.26 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 329.82 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 333.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You s

{'eval_loss': 0.44592025876045227, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0874, 'eval_samples_per_second': 11.435, 'eval_steps_per_second': 11.435, 'epoch': 1.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

 67%|██████▋   | 26/39 [00:48<00:19,  1.53s/it]

{'eval_loss': 0.3891408145427704, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0929, 'eval_samples_per_second': 10.76, 'eval_steps_per_second': 10.76, 'epoch': 2.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))

100%|██████████| 39/39 [01:12<00:00,  1.51s/it]

{'eval_loss': 0.37858134508132935, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.9206349206349206, 'eval_span_f1': 0.0, 'eval_runtime': 0.0945, 'eval_samples_per_second': 10.577, 'eval_steps_per_second': 10.577, 'epoch': 3.0}


100%|██████████| 39/39 [01:13<00:00,  1.90s/it]


{'train_runtime': 73.9703, 'train_samples_per_second': 3.934, 'train_steps_per_second': 0.527, 'train_loss': 0.34212339841402495, 'epoch': 3.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
100%|██████████| 1/1 [00:00<00:00, 144.70it/s]
100%|██████████| 1/1 [00:00<00:00, 166.93it/s]
100%|██████████| 1/1 [00:00<00:00, 140.26it/s]
