In [1]:
import re
import random
import os

from tqdm import tqdm
import pandas as pd
import torch

from torch.utils.data import DataLoader
from transformers import AutoTokenizer, AutoModelForTokenClassification, RobertaTokenizerFast, get_scheduler
from transformers import DataCollatorForTokenClassification
from datasets import Dataset, DatasetDict, Features, Sequence, Value, ClassLabel
from accelerate import Accelerator

from sklearn.metrics import precision_recall_fscore_support

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Reduce VRAM usage by reducing fragmentation
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"


## Combine all .txt and .ann files and combine them per medicine

In [3]:
# Base folders containing annotation and text files
annotations_folder = 'annotations/'
original_texts_folder = 'originaltexts/'
output_folder = 'output_datasets/'
os.makedirs(output_folder, exist_ok=True)

# Group files by medicine
file_groups = {}
for file_name in os.listdir(annotations_folder):
    if file_name.endswith('.ann'):
        base_name = '.'.join(file_name.split('.')[:-1])
        medicine = base_name.rsplit('.', 1)[0]
        file_groups.setdefault(medicine, []).append(file_name)

# Process each group
for medicine, ann_files in file_groups.items():
    combined_output = []

    for ann_file in ann_files:
        txt_file = ann_file.replace('.ann', '.txt')
        txt_path = os.path.join(original_texts_folder, txt_file)
        ann_path = os.path.join(annotations_folder, ann_file)

        # Ensure the corresponding .txt file exists
        if not os.path.exists(txt_path):
            raise FileNotFoundError(f"Text file not found for annotation file {ann_file}")

        # Read the content of the .ann and .txt files
        with open(ann_path, 'r') as ann_f:
            ann_lines = ann_f.readlines()

        with open(txt_path, 'r') as txt_f:
            txt_content = txt_f.read()

        # Parse annotations and filter out AnnotatorNotes
        annotations = []
        for line in ann_lines:
            if line.startswith('T'):
                parts = line.strip().split('\t')
                if len(parts) == 3:
                    tag_info, word = parts[1], parts[2]
                    tag_parts = tag_info.split()
                    if len(tag_parts) >= 3:
                        tag = tag_parts[0]
                        try:
                            start_idx = int(tag_parts[1])
                            end_idx = int(tag_parts[2])
                        except ValueError:
                            if ';' in tag_parts[2]:  # Handle ranges like '742;763'
                                start_idx = int(tag_parts[1])
                                end_idx = int(tag_parts[2].split(';')[-1])
                            else:
                                raise ValueError(f"Unexpected annotation format: {tag_parts}")
                        annotations.append((start_idx, end_idx, tag, word))

        # Sort annotations by start index
        annotations.sort(key=lambda x: x[0])

        # Generate output format
        output = []
        current_idx = 0
        for start_idx, end_idx, tag, word in annotations:
            # Add text between the last annotation and the current annotation as "O"
            if current_idx < start_idx:
                intervening_text = txt_content[current_idx:start_idx]
                for token in re.findall(r"\w+(?:'\w+)?|[.,!?]", intervening_text):
                    output.append(f"{token} O")

            # Add the annotated word with its tag
            for i, token in enumerate(word.split()):
                tag_prefix = 'B-' if i == 0 else 'I-'
                output.append(f"{token} {tag_prefix}{tag}")

            current_idx = end_idx

        # Add remaining text as "O"
        if current_idx < len(txt_content):
            remaining_text = txt_content[current_idx:]
            for token in re.findall(r"\w+(?:'\w+)?|[.,!?]", remaining_text):
                output.append(f"{token} O")

        # Add to combined output with a newline separator
        combined_output.extend(output)
        combined_output.append('')  # Empty line between posts

    # Write combined output to file
    combined_output_text = '\n'.join(combined_output).strip()
    output_file = os.path.join(output_folder, f"{medicine}_combined_output.txt")
    with open(output_file, 'w') as out_f:
        out_f.write(combined_output_text)

    print(f"Combined output saved for {medicine} in {output_file}")

Combined output saved for LIPITOR in output_datasets/LIPITOR_combined_output.txt
Combined output saved for VOLTAREN-XR in output_datasets/VOLTAREN-XR_combined_output.txt
Combined output saved for VOLTAREN in output_datasets/VOLTAREN_combined_output.txt
Combined output saved for ZIPSOR in output_datasets/ZIPSOR_combined_output.txt
Combined output saved for ARTHROTEC in output_datasets/ARTHROTEC_combined_output.txt
Combined output saved for CATAFLAM in output_datasets/CATAFLAM_combined_output.txt
Combined output saved for PENNSAID in output_datasets/PENNSAID_combined_output.txt
Combined output saved for DICLOFENAC-POTASSIUM in output_datasets/DICLOFENAC-POTASSIUM_combined_output.txt
Combined output saved for SOLARAZE in output_datasets/SOLARAZE_combined_output.txt
Combined output saved for DICLOFENAC-SODIUM in output_datasets/DICLOFENAC-SODIUM_combined_output.txt
Combined output saved for CAMBIA in output_datasets/CAMBIA_combined_output.txt
Combined output saved for FLECTOR in output_dat

## Combine all the medicine files into one dataset

In [4]:
# Folder containing all combined output files
output_datasets_folder = 'output_datasets/'
final_output_file = 'final_dataset.txt'

# Ensure the folder exists
if not os.path.exists(output_datasets_folder):
    raise FileNotFoundError(f"The folder {output_datasets_folder} does not exist.")

# List all files in the folder
output_files = [f for f in os.listdir(output_datasets_folder) if f.endswith('_combined_output.txt')]

# Combine all files into a single final dataset
final_dataset = []
for file_name in output_files:
    file_path = os.path.join(output_datasets_folder, file_name)
    with open(file_path, 'r') as f:
        content = f.read().strip()  # Read and strip any trailing spaces or newlines
        final_dataset.append(content)

    # Add an empty line to separate posts from different files
    final_dataset.append('')

# Write the combined dataset to the final output file
with open(final_output_file, 'w') as f:
    f.write('\n'.join(final_dataset).strip())  # Ensure no extra trailing newline

print(f"Final dataset saved to {final_output_file}")

Final dataset saved to final_dataset.txt


## Read the final dataset into the Iob dataset format

In [5]:
def read_iob_file(file_path):
    """Reads an IOB file from filepath and returns sentences with tokens and tags."""
    sentences = []
    sentence_tokens = []
    sentence_labels = []

    with open(file_path, "r", encoding="utf-8") as file:
        for line in file:
            line = line.strip()
            if line:  # If line is not empty
                token, tag = line.split()
                sentence_tokens.append(token)
                sentence_labels.append(tag)

            else:
                # End of a sentence
                if sentence_tokens:
                    sentences.append({"tokens": sentence_tokens, "ner_tags": sentence_labels})
                    sentence_tokens = []
                    sentence_labels = []
        # Add the last sentence if file doesn't end with a newline
        if sentence_tokens:
            sentences.append({"tokens": sentence_tokens, "ner_tags": sentence_labels})
    return sentences

def create_dataset_from_final_file(final_file_path):
    """Create a dataset from a single IOB file and return it as a DatasetDict."""

    if not os.path.exists(final_file_path):
        raise FileNotFoundError(f"The file {final_file_path} does not exist.")

    # Parse the file
    data = read_iob_file(final_file_path)

    # Define the label names and ClassLabel feature
    unique_labels = sorted(set(tag for d in data for tag in d["ner_tags"]))
    label_feature = ClassLabel(names=unique_labels)

    # Define the Features schema for Hugging Face datasets
    features = Features({
        'tokens': Sequence(Value("string")),
        'ner_tags': Sequence(label_feature)
    })

    # Convert data into a Dataset
    dataset = Dataset.from_list(data).cast(features)

    # Create a DatasetDict
    dataset_dict = DatasetDict({"full_data": dataset})

    return dataset_dict


In [6]:
final_dataset_path = "final_dataset.txt"
dataset_dict = create_dataset_from_final_file(final_dataset_path)
dataset = dataset_dict['full_data']

Casting the dataset: 100%|██████████| 1248/1248 [00:00<00:00, 12309.59 examples/s]


## Dataset generators

In [7]:
def generate_train_datasets(dataset_, number_of_samples, number_of_splits):
    """
    Generates train datasets by sampling from the given dataset based on the number of samples and splits.

    Args:
        dataset_ (Dataset): The base dataset to sample from.
        number_of_samples (int): Number of samples per dataset.
        number_of_splits (int): Number of datasets to generate (different seeds).

    Returns:
        List[Tuple[str, Dataset, List[int]]]: List of generated datasets with their names and indices.
    """
    datasets = []

    for seed in range(number_of_splits):
        # Set the random seed for reproducibility
        random.seed(seed)

        # Shuffle and sample from the dataset
        indices = list(range(len(dataset_)))
        random.shuffle(indices)
        sampled_indices = indices[:number_of_samples]

        sampled_dataset = dataset_.select(sampled_indices)

        # Add the dataset with its name and indices
        datasets.append((f"train_dataset_{number_of_samples}_{seed}", sampled_dataset, sampled_indices))

    return datasets

In [8]:
def generate_validation_datasets(dataset_, train_indices, number_of_samples, number_of_splits):
    """
    Generates validation datasets by sampling from the given dataset, ensuring no overlap with training data.

    Args:
        dataset_ (Dataset): The base dataset to sample from.
        train_indices (List[int]): Indices of the training dataset to exclude from sampling.
        number_of_samples (int): Number of samples per validation dataset.
        number_of_splits (int): Number of validation datasets to generate (different seeds).

    Returns:
        List[Tuple[str, Dataset, List[int]]]: List of generated validation datasets with names and indices.
    """
    datasets = []
    all_indices = set(range(len(dataset_)))
    available_indices = list(all_indices - set(train_indices))  # Exclude training indices

    for seed in range(number_of_splits):
        # Set the random seed for reproducibility
        random.seed(seed)

        # Shuffle and sample from the remaining indices
        random.shuffle(available_indices)
        sampled_indices = available_indices[:int(number_of_samples / 5)]

        sampled_dataset = dataset_.select(sampled_indices)

        # Add the dataset with its name and indices
        datasets.append((f"val_dataset_{number_of_samples/5}_{seed}", sampled_dataset, sampled_indices))

    return datasets

In [9]:
def generate_test_datasets(dataset_, train_indices, val_indices, number_of_samples, number_of_splits):
    """
    Generates test datasets by sampling from the given dataset, ensuring no overlap with training or validation data.

    Args:
        dataset_ (Dataset): The base dataset to sample from.
        train_indices (List[int]): Indices of the training dataset to exclude from sampling.
        val_indices (List[int]): Indices of the validation dataset to exclude from sampling.
        number_of_samples (int): Number of samples per test dataset.
        number_of_splits (int): Number of test datasets to generate (different seeds).

    Returns:
        List[Tuple[str, Dataset]]: List of generated test datasets with names.
    """
    datasets = []
    all_indices = set(range(len(dataset_)))
    available_indices = list(all_indices - set(train_indices) - set(val_indices))  # Exclude train and val indices

    for seed in range(number_of_splits):
        sampled_indices = available_indices[:]

        sampled_dataset = dataset_.select(sampled_indices)

        # Add the dataset with its name
        datasets.append((f"test_dataset_{number_of_samples}_{seed}", sampled_dataset))

    return datasets

### Example usage

In [11]:
# Step 1: Generate Train Dataset
# train_datasets = generate_train_datasets(dataset, number_of_samples=30, number_of_splits=1)
# train_name, train_dataset, train_indices = train_datasets[0]
# print(f"{train_name}: {len(train_dataset)} samples")
#
# # Step 2: Generate Validation Dataset
# val_datasets = generate_validation_datasets(dataset, train_indices, number_of_samples=30, number_of_splits=1)
# val_name, val_dataset, val_indices = val_datasets[0]
# print(f"{val_name}: {len(val_dataset)} samples")
#
# # Step 3: Generate Test Dataset
# test_datasets = generate_test_datasets(dataset, train_indices, val_indices, number_of_samples=30, number_of_splits=1)
# test_name, test_dataset = test_datasets[0]
# print(f"{test_name}: {len(test_dataset)} samples")

# Prepping for training

In [10]:
label_names = dataset.features["ner_tags"].feature.names
id2label = {i: label for i, label in enumerate(label_names)}
label2id = {v: k for k, v in id2label.items()}

In [11]:
def postprocess(predictions, labels):
    predictions = predictions.detach().cpu().clone().numpy()
    labels = labels.detach().cpu().clone().numpy()

    # Flatten predictions and labels, removing ignored indices
    true_labels = [label for label_seq in labels for label in label_seq if label != -100]
    true_predictions = [pred for pred_seq, label_seq in zip(predictions, labels)
                        for pred, label in zip(pred_seq, label_seq) if label != -100]
    return true_labels, true_predictions


# Training and evaluation

In [12]:
def iterate_and_finetune_with_torch(
    dataset,
    file_name,
    models,
    start_size=5,
    end_size=500,
    step_size=5,
    k_splits=5,
    batch_size=8,
    learning_rate=5e-5,
    weight_decay=0.0,
    num_epochs=3,
):
    """
    Fine-tune models with varying dataset sizes and k-fold splits, saving results to Excel.

    Parameters:
    - dataset (DatasetDict): Dataset for training, validation, and testing.
    - file_name (str): Excel file to save results.
    - models (dict): Dictionary of model names and their sizes.
    - start_size (int): Starting size for training datasets.
    - end_size (int): Maximum size for training datasets.
    - step_size (int): Step size for increasing dataset sizes.
    - k_splits (int): Number of k-fold splits.
    - batch_size (int): Training batch size.
    - learning_rate (float): Learning rate for fine-tuning.
    - weight_decay (float): Weight decay for optimizer.
    - num_epochs (int): Number of training epochs.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Check or create the results file
    if os.path.exists(file_name):
        results_df = pd.read_excel(file_name)
    else:
        results_df = pd.DataFrame(columns=["Train Size", "K-Fold", "Test F1", "Model"])

    for train_size in range(start_size, end_size + 1, step_size):
        for split in range(k_splits):
            for size, model_name in models.items():
                print(f"\nFine-tuning {model_name} ({size}) with Train Size {train_size}, Split {split + 1}...")

                # Initialize tokenizer
                if size == "large":
                    tokenizer = RobertaTokenizerFast.from_pretrained("roberta-large", add_prefix_space=True)
                else:
                    tokenizer = AutoTokenizer.from_pretrained(model_name)

                data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

                # Generate datasets
                train_datasets = generate_train_datasets(
                    dataset, number_of_samples=train_size, number_of_splits=k_splits
                )
                _, train_dataset, train_indices = train_datasets[split]

                val_datasets = generate_validation_datasets(
                    dataset, train_indices=train_indices, number_of_samples=train_size, number_of_splits=k_splits
                )
                _, val_dataset, val_indices = val_datasets[split]

                test_datasets = generate_test_datasets(
                    dataset, train_indices=train_indices, val_indices=val_indices,
                    number_of_samples=train_size, number_of_splits=k_splits
                )
                _, test_dataset = test_datasets[split]

                def align_labels_with_tokens(labels_, word_ids):
                    new_labels = []
                    current_word = None

                    for word_id in word_ids:
                        if word_id != current_word:
                            current_word = word_id
                            label = -100 if word_id is None else labels_[word_id]
                            new_labels.append(label)

                        elif word_id is None:
                            # Special token
                            new_labels.append(-100)

                        else:
                            # Same word as previous token
                            label = labels_[word_id]

                            # If the label is B-XXX we change it to I-XXX
                            if label % 2 == 1:
                                label += 1
                            new_labels.append(label)

                    return new_labels


                def tokenize_and_align_labels(examples):
                    tokenized_inputs = tokenizer(
                        examples["tokens"], truncation=True,
                        is_split_into_words=True
                    )
                    all_labels = examples["ner_tags"]
                    new_labels = []
                    for i, labels_ in enumerate(all_labels):
                        word_ids = tokenized_inputs.word_ids(i)
                        new_labels.append(align_labels_with_tokens(labels_, word_ids))

                    tokenized_inputs["labels"] = new_labels
                    return tokenized_inputs

                # Tokenize datasets
                tokenized_train = train_dataset.map(tokenize_and_align_labels, batched=True,  remove_columns=dataset.column_names)
                tokenized_val = val_dataset.map(tokenize_and_align_labels, batched=True,  remove_columns=dataset.column_names)
                tokenized_test = test_dataset.map(tokenize_and_align_labels, batched=True,  remove_columns=dataset.column_names)

                train_dataloader = DataLoader(tokenized_train, batch_size=batch_size, shuffle=True, collate_fn=data_collator)
                val_dataloader = DataLoader(tokenized_val, batch_size=batch_size, collate_fn=data_collator)
                test_dataloader = DataLoader(tokenized_test, batch_size=batch_size, collate_fn=data_collator)

                # Initialize the model for token classification
                model = AutoModelForTokenClassification.from_pretrained(
                    model_name, id2label=id2label, label2id=label2id
                )
                
                #Optimize GPU ram at cost of some speed
                model.gradient_checkpointing_enable()


                optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
                num_training_steps = num_epochs * len(train_dataloader)
                lr_scheduler = get_scheduler(
                    "linear",
                    optimizer=optimizer,
                    num_warmup_steps=0,
                    num_training_steps=num_training_steps
                )

                accelerator = Accelerator()
                model, optimizer, train_dataloader, val_dataloader = accelerator.prepare(
                    model, optimizer, train_dataloader, val_dataloader
                )


                # Training loop
                for epoch in range(num_epochs):
                    print(f"Epoch {epoch + 1}/{num_epochs}")
                    model.train()
                    total_loss = 0
                    progress_bar = tqdm(train_dataloader, desc=f"Training Epoch {epoch+1}")
                    for batch in progress_bar:
                        batch = {k: v.to(device) for k, v in batch.items()}
                        outputs = model(**batch)
                        loss = outputs.loss
                        total_loss += loss.item()

                        accelerator.backward(loss)
                        optimizer.step()
                        lr_scheduler.step()
                        optimizer.zero_grad()
                        progress_bar.set_postfix(loss=loss.item())

                    print(f"Epoch {epoch + 1} Loss: {total_loss:.4f}")

                # Validation loop
                model.eval()
                val_predictions, val_labels = [], []
                with torch.no_grad():
                    for batch in val_dataloader:
                        batch = {k: v.to(device) for k, v in batch.items()}
                        outputs = model(**batch)
                        logits = outputs.logits
                        predictions = logits.argmax(dim=-1)
                        labels = batch["labels"]

                        predictions = accelerator.gather(predictions)
                        labels = accelerator.gather(labels)

                        flat_labels, flat_predictions = postprocess(predictions, labels)
                        val_labels.extend(flat_labels)
                        val_predictions.extend(flat_predictions)

                # Test loop
                test_predictions, test_labels = [], []
                with torch.no_grad():
                    for batch in test_dataloader:
                        batch = {k: v.to(device) for k, v in batch.items()}
                        outputs = model(**batch)
                        logits = outputs.logits
                        predictions = logits.argmax(dim=-1)
                        labels = batch["labels"]

                        predictions = accelerator.gather(predictions)
                        labels = accelerator.gather(labels)

                        flat_labels, flat_predictions = postprocess(predictions, labels)
                        test_labels.extend(flat_labels)
                        test_predictions.extend(flat_predictions)

                # Calculate test metrics
                precision, recall, f1, _ = precision_recall_fscore_support(
                    test_labels, test_predictions, average="micro"
                )
                print(f"Test Metrics: Precision={precision:.4f}, Recall={recall:.4f}, F1={f1:.4f}")

                # Append results
                new_row = pd.DataFrame(
                    [{"Train Size": train_size, "K-Fold": split + 1, "Test F1": f1, "Model": model_name}]
                )
                results_df = pd.concat([results_df, new_row], ignore_index=True)
                results_df.to_excel(file_name, index=False)

                # Cleanup
                del model, train_dataloader, val_dataloader, test_dataloader, predictions, labels, test_labels
                del test_predictions, logits, loss, outputs, precision, recall, f1, new_row 
                del val_dataset, train_dataset, test_dataset, val_datasets, train_datasets, test_datasets
                del optimizer, tokenized_train, tokenized_test, tokenized_val, tokenizer
                torch.cuda.empty_cache()

    print(f"Results saved to {file_name}")


In [None]:
models = {
    "small": "bert-base-cased",
    "medium": "bert-large-cased",
    "large": "roberta-large"
}

iterate_and_finetune_with_torch(dataset=dataset, file_name='Experiments_full_labeled.xlsx', models=models, start_size=500, end_size=500, step_size=5)


Fine-tuning bert-base-cased (small) with Train Size 500, Split 1...


Map: 100%|██████████| 500/500 [00:00<00:00, 8145.36 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7725.74 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 8476.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:05<00:00, 11.17it/s, loss=0.34] 


Epoch 1 Loss: 36.8305
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:05<00:00, 11.31it/s, loss=0.414]


Epoch 2 Loss: 17.6808
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:05<00:00, 11.53it/s, loss=0.147] 


Epoch 3 Loss: 12.8263
Test Metrics: Precision=0.9122, Recall=0.9122, F1=0.9122

Fine-tuning bert-large-cased (medium) with Train Size 500, Split 1...


Map:   0%|          | 0/500 [00:00<?, ? examples/s]

In [15]:
models = {
    "large": "roberta-large"
}

iterate_and_finetune_with_torch(dataset=dataset, file_name='Experiments_epoch4_roberta.xlsx', models=models, start_size=230, end_size=500, step_size=5, num_epochs=4)


Fine-tuning roberta-large (large) with Train Size 230, Split 1...


Map: 100%|██████████| 230/230 [00:00<00:00, 6874.63 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 6216.39 examples/s]
Map: 100%|██████████| 972/972 [00:00<00:00, 9899.70 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 29/29 [00:12<00:00,  2.32it/s, loss=0.451]


Epoch 1 Loss: 17.1539
Epoch 2/4


Training Epoch 2: 100%|██████████| 29/29 [00:12<00:00,  2.29it/s, loss=0.26] 


Epoch 2 Loss: 7.8948
Epoch 3/4


Training Epoch 3: 100%|██████████| 29/29 [00:12<00:00,  2.27it/s, loss=0.193]


Epoch 3 Loss: 5.3007
Epoch 4/4


Training Epoch 4: 100%|██████████| 29/29 [00:13<00:00,  2.23it/s, loss=0.119] 


Epoch 4 Loss: 3.8220
Test Metrics: Precision=0.9248, Recall=0.9248, F1=0.9248

Fine-tuning roberta-large (large) with Train Size 230, Split 2...


Map: 100%|██████████| 230/230 [00:00<00:00, 8573.65 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 4918.62 examples/s]
Map: 100%|██████████| 972/972 [00:00<00:00, 9930.03 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 29/29 [00:13<00:00,  2.15it/s, loss=0.318]


Epoch 1 Loss: 16.1513
Epoch 2/4


Training Epoch 2: 100%|██████████| 29/29 [00:13<00:00,  2.13it/s, loss=0.271]


Epoch 2 Loss: 7.9576
Epoch 3/4


Training Epoch 3: 100%|██████████| 29/29 [00:13<00:00,  2.09it/s, loss=0.188]


Epoch 3 Loss: 5.4727
Epoch 4/4


Training Epoch 4: 100%|██████████| 29/29 [00:13<00:00,  2.18it/s, loss=0.115] 


Epoch 4 Loss: 3.7431
Test Metrics: Precision=0.9282, Recall=0.9282, F1=0.9282

Fine-tuning roberta-large (large) with Train Size 230, Split 3...


Map: 100%|██████████| 230/230 [00:00<00:00, 8432.68 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 6335.81 examples/s]
Map: 100%|██████████| 972/972 [00:00<00:00, 9513.13 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 29/29 [00:14<00:00,  2.05it/s, loss=0.38] 


Epoch 1 Loss: 15.8315
Epoch 2/4


Training Epoch 2: 100%|██████████| 29/29 [00:14<00:00,  2.02it/s, loss=0.323]


Epoch 2 Loss: 10.8092
Epoch 3/4


Training Epoch 3: 100%|██████████| 29/29 [00:14<00:00,  2.00it/s, loss=0.12] 


Epoch 3 Loss: 6.0276
Epoch 4/4


Training Epoch 4: 100%|██████████| 29/29 [00:14<00:00,  1.97it/s, loss=0.146] 


Epoch 4 Loss: 4.6522
Test Metrics: Precision=0.9239, Recall=0.9239, F1=0.9239

Fine-tuning roberta-large (large) with Train Size 230, Split 4...


Map: 100%|██████████| 230/230 [00:00<00:00, 8787.40 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 6120.35 examples/s]
Map: 100%|██████████| 972/972 [00:00<00:00, 9395.49 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 29/29 [00:14<00:00,  1.97it/s, loss=0.718]


Epoch 1 Loss: 21.8701
Epoch 2/4


Training Epoch 2: 100%|██████████| 29/29 [00:14<00:00,  2.01it/s, loss=0.302]


Epoch 2 Loss: 9.5991
Epoch 3/4


Training Epoch 3: 100%|██████████| 29/29 [00:14<00:00,  2.02it/s, loss=0.178]


Epoch 3 Loss: 7.1273
Epoch 4/4


Training Epoch 4: 100%|██████████| 29/29 [00:14<00:00,  2.07it/s, loss=0.188] 


Epoch 4 Loss: 5.7711
Test Metrics: Precision=0.9200, Recall=0.9200, F1=0.9200

Fine-tuning roberta-large (large) with Train Size 230, Split 5...


Map: 100%|██████████| 230/230 [00:00<00:00, 8430.76 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 6957.48 examples/s]
Map: 100%|██████████| 972/972 [00:00<00:00, 9712.97 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 29/29 [00:14<00:00,  1.95it/s, loss=0.419]


Epoch 1 Loss: 21.0435
Epoch 2/4


Training Epoch 2: 100%|██████████| 29/29 [00:15<00:00,  1.93it/s, loss=0.229]


Epoch 2 Loss: 8.6151
Epoch 3/4


Training Epoch 3: 100%|██████████| 29/29 [00:14<00:00,  1.96it/s, loss=0.222]


Epoch 3 Loss: 6.3708
Epoch 4/4


Training Epoch 4: 100%|██████████| 29/29 [00:15<00:00,  1.86it/s, loss=0.105] 


Epoch 4 Loss: 4.6083
Test Metrics: Precision=0.9271, Recall=0.9271, F1=0.9271

Fine-tuning roberta-large (large) with Train Size 235, Split 1...


Map: 100%|██████████| 235/235 [00:00<00:00, 8586.95 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 6393.34 examples/s]
Map: 100%|██████████| 966/966 [00:00<00:00, 9591.91 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 30/30 [00:13<00:00,  2.19it/s, loss=0.498]


Epoch 1 Loss: 22.1261
Epoch 2/4


Training Epoch 2: 100%|██████████| 30/30 [00:13<00:00,  2.23it/s, loss=0.42] 


Epoch 2 Loss: 10.1006
Epoch 3/4


Training Epoch 3: 100%|██████████| 30/30 [00:13<00:00,  2.18it/s, loss=0.175]


Epoch 3 Loss: 6.9448
Epoch 4/4


Training Epoch 4: 100%|██████████| 30/30 [00:13<00:00,  2.17it/s, loss=0.185] 


Epoch 4 Loss: 5.1362
Test Metrics: Precision=0.9243, Recall=0.9243, F1=0.9243

Fine-tuning roberta-large (large) with Train Size 235, Split 2...


Map: 100%|██████████| 235/235 [00:00<00:00, 8759.88 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 6145.79 examples/s]
Map: 100%|██████████| 966/966 [00:00<00:00, 9372.92 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 30/30 [00:14<00:00,  2.09it/s, loss=0.7]  


Epoch 1 Loss: 20.1078
Epoch 2/4


Training Epoch 2: 100%|██████████| 30/30 [00:14<00:00,  2.09it/s, loss=0.31] 


Epoch 2 Loss: 10.4120
Epoch 3/4


Training Epoch 3: 100%|██████████| 30/30 [00:14<00:00,  2.07it/s, loss=0.278]


Epoch 3 Loss: 7.4679
Epoch 4/4


Training Epoch 4: 100%|██████████| 30/30 [00:13<00:00,  2.16it/s, loss=0.0684]


Epoch 4 Loss: 5.8451
Test Metrics: Precision=0.9213, Recall=0.9213, F1=0.9213

Fine-tuning roberta-large (large) with Train Size 235, Split 3...


Map: 100%|██████████| 235/235 [00:00<00:00, 8569.78 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 6385.68 examples/s]
Map: 100%|██████████| 966/966 [00:00<00:00, 9448.37 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 30/30 [00:14<00:00,  2.09it/s, loss=0.349]


Epoch 1 Loss: 19.9471
Epoch 2/4


Training Epoch 2: 100%|██████████| 30/30 [00:14<00:00,  2.00it/s, loss=0.437]


Epoch 2 Loss: 8.8872
Epoch 3/4


Training Epoch 3: 100%|██████████| 30/30 [00:14<00:00,  2.08it/s, loss=0.108]


Epoch 3 Loss: 6.0900
Epoch 4/4


Training Epoch 4: 100%|██████████| 30/30 [00:14<00:00,  2.02it/s, loss=0.34]  


Epoch 4 Loss: 4.8306
Test Metrics: Precision=0.9255, Recall=0.9255, F1=0.9255

Fine-tuning roberta-large (large) with Train Size 235, Split 4...


Map: 100%|██████████| 235/235 [00:00<00:00, 8616.52 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 6553.16 examples/s]
Map: 100%|██████████| 966/966 [00:00<00:00, 9375.87 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 30/30 [00:14<00:00,  2.04it/s, loss=0.394]


Epoch 1 Loss: 18.6685
Epoch 2/4


Training Epoch 2: 100%|██████████| 30/30 [00:14<00:00,  2.10it/s, loss=0.158]


Epoch 2 Loss: 8.7430
Epoch 3/4


Training Epoch 3: 100%|██████████| 30/30 [00:14<00:00,  2.01it/s, loss=0.181]


Epoch 3 Loss: 6.1439
Epoch 4/4


Training Epoch 4: 100%|██████████| 30/30 [00:14<00:00,  2.09it/s, loss=0.144] 


Epoch 4 Loss: 4.3838
Test Metrics: Precision=0.9237, Recall=0.9237, F1=0.9237

Fine-tuning roberta-large (large) with Train Size 235, Split 5...


Map: 100%|██████████| 235/235 [00:00<00:00, 8275.50 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 6139.09 examples/s]
Map: 100%|██████████| 966/966 [00:00<00:00, 9601.00 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 30/30 [00:15<00:00,  1.93it/s, loss=0.334]


Epoch 1 Loss: 20.3138
Epoch 2/4


Training Epoch 2: 100%|██████████| 30/30 [00:15<00:00,  1.99it/s, loss=0.271]


Epoch 2 Loss: 8.7090
Epoch 3/4


Training Epoch 3: 100%|██████████| 30/30 [00:15<00:00,  1.96it/s, loss=0.453]


Epoch 3 Loss: 6.4535
Epoch 4/4


Training Epoch 4: 100%|██████████| 30/30 [00:15<00:00,  1.99it/s, loss=0.124] 


Epoch 4 Loss: 4.5384
Test Metrics: Precision=0.9283, Recall=0.9283, F1=0.9283

Fine-tuning roberta-large (large) with Train Size 240, Split 1...


Map: 100%|██████████| 240/240 [00:00<00:00, 8775.38 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 6887.91 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9576.45 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 30/30 [00:13<00:00,  2.14it/s, loss=0.499]


Epoch 1 Loss: 18.5754
Epoch 2/4


Training Epoch 2: 100%|██████████| 30/30 [00:13<00:00,  2.15it/s, loss=0.228]


Epoch 2 Loss: 7.7865
Epoch 3/4


Training Epoch 3: 100%|██████████| 30/30 [00:13<00:00,  2.14it/s, loss=0.125]


Epoch 3 Loss: 5.1842
Epoch 4/4


Training Epoch 4: 100%|██████████| 30/30 [00:14<00:00,  2.14it/s, loss=0.198] 


Epoch 4 Loss: 3.9005
Test Metrics: Precision=0.9265, Recall=0.9265, F1=0.9265

Fine-tuning roberta-large (large) with Train Size 240, Split 2...


Map: 100%|██████████| 240/240 [00:00<00:00, 8467.57 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 5717.39 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9492.13 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 30/30 [00:14<00:00,  2.07it/s, loss=0.508]


Epoch 1 Loss: 19.9972
Epoch 2/4


Training Epoch 2: 100%|██████████| 30/30 [00:14<00:00,  2.04it/s, loss=0.268]


Epoch 2 Loss: 9.9490
Epoch 3/4


Training Epoch 3: 100%|██████████| 30/30 [00:14<00:00,  2.09it/s, loss=0.222] 


Epoch 3 Loss: 6.6022
Epoch 4/4


Training Epoch 4: 100%|██████████| 30/30 [00:14<00:00,  2.08it/s, loss=0.169] 


Epoch 4 Loss: 5.1501
Test Metrics: Precision=0.9244, Recall=0.9244, F1=0.9244

Fine-tuning roberta-large (large) with Train Size 240, Split 3...


Map: 100%|██████████| 240/240 [00:00<00:00, 8516.14 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 5055.66 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9847.18 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 30/30 [00:14<00:00,  2.00it/s, loss=0.352]


Epoch 1 Loss: 23.5331
Epoch 2/4


Training Epoch 2: 100%|██████████| 30/30 [00:14<00:00,  2.07it/s, loss=0.386]


Epoch 2 Loss: 9.6900
Epoch 3/4


Training Epoch 3: 100%|██████████| 30/30 [00:14<00:00,  2.04it/s, loss=0.22] 


Epoch 3 Loss: 6.7747
Epoch 4/4


Training Epoch 4: 100%|██████████| 30/30 [00:13<00:00,  2.15it/s, loss=0.0993]


Epoch 4 Loss: 5.1071
Test Metrics: Precision=0.9251, Recall=0.9251, F1=0.9251

Fine-tuning roberta-large (large) with Train Size 240, Split 4...


Map: 100%|██████████| 240/240 [00:00<00:00, 8562.64 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 5837.75 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9106.13 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 30/30 [00:14<00:00,  2.05it/s, loss=0.36] 


Epoch 1 Loss: 18.5935
Epoch 2/4


Training Epoch 2: 100%|██████████| 30/30 [00:14<00:00,  2.06it/s, loss=0.321]


Epoch 2 Loss: 9.1743
Epoch 3/4


Training Epoch 3: 100%|██████████| 30/30 [00:14<00:00,  2.01it/s, loss=0.197]


Epoch 3 Loss: 6.3395
Epoch 4/4


Training Epoch 4: 100%|██████████| 30/30 [00:14<00:00,  2.02it/s, loss=0.138] 


Epoch 4 Loss: 4.7071
Test Metrics: Precision=0.9222, Recall=0.9222, F1=0.9222

Fine-tuning roberta-large (large) with Train Size 240, Split 5...


Map: 100%|██████████| 240/240 [00:00<00:00, 8430.90 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 6472.07 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9557.74 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 30/30 [00:16<00:00,  1.87it/s, loss=0.398]


Epoch 1 Loss: 21.6783
Epoch 2/4


Training Epoch 2: 100%|██████████| 30/30 [00:16<00:00,  1.85it/s, loss=0.217]


Epoch 2 Loss: 9.5087
Epoch 3/4


Training Epoch 3: 100%|██████████| 30/30 [00:14<00:00,  2.00it/s, loss=0.423] 


Epoch 3 Loss: 6.5644
Epoch 4/4


Training Epoch 4: 100%|██████████| 30/30 [00:15<00:00,  1.89it/s, loss=0.163] 


Epoch 4 Loss: 4.7057
Test Metrics: Precision=0.9287, Recall=0.9287, F1=0.9287

Fine-tuning roberta-large (large) with Train Size 245, Split 1...


Map: 100%|██████████| 245/245 [00:00<00:00, 8560.37 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 6264.54 examples/s]
Map: 100%|██████████| 954/954 [00:00<00:00, 9416.29 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 31/31 [00:13<00:00,  2.27it/s, loss=0.349]


Epoch 1 Loss: 19.8038
Epoch 2/4


Training Epoch 2: 100%|██████████| 31/31 [00:13<00:00,  2.23it/s, loss=0.5]  


Epoch 2 Loss: 9.1451
Epoch 3/4


Training Epoch 3: 100%|██████████| 31/31 [00:14<00:00,  2.15it/s, loss=0.172]


Epoch 3 Loss: 6.8509
Epoch 4/4


Training Epoch 4: 100%|██████████| 31/31 [00:13<00:00,  2.21it/s, loss=0.392]


Epoch 4 Loss: 5.9377
Test Metrics: Precision=0.9192, Recall=0.9192, F1=0.9192

Fine-tuning roberta-large (large) with Train Size 245, Split 2...


Map: 100%|██████████| 245/245 [00:00<00:00, 8498.08 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 5765.29 examples/s]
Map: 100%|██████████| 954/954 [00:00<00:00, 9592.01 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 31/31 [00:14<00:00,  2.08it/s, loss=0.363]


Epoch 1 Loss: 21.2771
Epoch 2/4


Training Epoch 2: 100%|██████████| 31/31 [00:15<00:00,  2.02it/s, loss=0.543]


Epoch 2 Loss: 10.8214
Epoch 3/4


Training Epoch 3: 100%|██████████| 31/31 [00:14<00:00,  2.14it/s, loss=0.26] 


Epoch 3 Loss: 7.9346
Epoch 4/4


Training Epoch 4: 100%|██████████| 31/31 [00:14<00:00,  2.12it/s, loss=0.172] 


Epoch 4 Loss: 6.1403
Test Metrics: Precision=0.9255, Recall=0.9255, F1=0.9255

Fine-tuning roberta-large (large) with Train Size 245, Split 3...


Map: 100%|██████████| 245/245 [00:00<00:00, 8501.80 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 5856.30 examples/s]
Map: 100%|██████████| 954/954 [00:00<00:00, 9541.51 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 31/31 [00:15<00:00,  2.01it/s, loss=0.302]


Epoch 1 Loss: 20.8881
Epoch 2/4


Training Epoch 2: 100%|██████████| 31/31 [00:15<00:00,  2.01it/s, loss=0.115]


Epoch 2 Loss: 9.4028
Epoch 3/4


Training Epoch 3: 100%|██████████| 31/31 [00:14<00:00,  2.08it/s, loss=0.341]


Epoch 3 Loss: 5.9755
Epoch 4/4


Training Epoch 4: 100%|██████████| 31/31 [00:15<00:00,  2.02it/s, loss=0.146] 


Epoch 4 Loss: 4.3427
Test Metrics: Precision=0.9254, Recall=0.9254, F1=0.9254

Fine-tuning roberta-large (large) with Train Size 245, Split 4...


Map: 100%|██████████| 245/245 [00:00<00:00, 8532.51 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 6094.93 examples/s]
Map: 100%|██████████| 954/954 [00:00<00:00, 9170.13 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 31/31 [00:15<00:00,  1.99it/s, loss=0.549]


Epoch 1 Loss: 20.6033
Epoch 2/4


Training Epoch 2: 100%|██████████| 31/31 [00:14<00:00,  2.07it/s, loss=0.286]


Epoch 2 Loss: 9.8133
Epoch 3/4


Training Epoch 3: 100%|██████████| 31/31 [00:14<00:00,  2.07it/s, loss=0.577]


Epoch 3 Loss: 7.0665
Epoch 4/4


Training Epoch 4: 100%|██████████| 31/31 [00:15<00:00,  2.05it/s, loss=0.11]  


Epoch 4 Loss: 5.4310
Test Metrics: Precision=0.9206, Recall=0.9206, F1=0.9206

Fine-tuning roberta-large (large) with Train Size 245, Split 5...


Map: 100%|██████████| 245/245 [00:00<00:00, 8660.15 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 6444.88 examples/s]
Map: 100%|██████████| 954/954 [00:00<00:00, 9841.38 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 31/31 [00:15<00:00,  2.05it/s, loss=0.79] 


Epoch 1 Loss: 21.2890
Epoch 2/4


Training Epoch 2: 100%|██████████| 31/31 [00:16<00:00,  1.94it/s, loss=0.169]


Epoch 2 Loss: 10.0579
Epoch 3/4


Training Epoch 3: 100%|██████████| 31/31 [00:17<00:00,  1.80it/s, loss=0.102]


Epoch 3 Loss: 6.8550
Epoch 4/4


Training Epoch 4: 100%|██████████| 31/31 [00:16<00:00,  1.91it/s, loss=0.146] 


Epoch 4 Loss: 5.5020
Test Metrics: Precision=0.9257, Recall=0.9257, F1=0.9257

Fine-tuning roberta-large (large) with Train Size 250, Split 1...


Map: 100%|██████████| 250/250 [00:00<00:00, 8717.07 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 6823.33 examples/s]
Map: 100%|██████████| 948/948 [00:00<00:00, 9478.54 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 32/32 [00:14<00:00,  2.22it/s, loss=0.28] 


Epoch 1 Loss: 20.5523
Epoch 2/4


Training Epoch 2: 100%|██████████| 32/32 [00:14<00:00,  2.24it/s, loss=0.255]


Epoch 2 Loss: 8.8245
Epoch 3/4


Training Epoch 3: 100%|██████████| 32/32 [00:14<00:00,  2.24it/s, loss=0.0934]


Epoch 3 Loss: 5.5945
Epoch 4/4


Training Epoch 4: 100%|██████████| 32/32 [00:14<00:00,  2.22it/s, loss=0.12]  


Epoch 4 Loss: 4.0216
Test Metrics: Precision=0.9270, Recall=0.9270, F1=0.9270

Fine-tuning roberta-large (large) with Train Size 250, Split 2...


Map: 100%|██████████| 250/250 [00:00<00:00, 9006.91 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 6867.35 examples/s]
Map: 100%|██████████| 948/948 [00:00<00:00, 9860.14 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 32/32 [00:15<00:00,  2.10it/s, loss=0.452]


Epoch 1 Loss: 20.7303
Epoch 2/4


Training Epoch 2: 100%|██████████| 32/32 [00:15<00:00,  2.11it/s, loss=0.36] 


Epoch 2 Loss: 10.1378
Epoch 3/4


Training Epoch 3: 100%|██████████| 32/32 [00:15<00:00,  2.11it/s, loss=0.278] 


Epoch 3 Loss: 7.4081
Epoch 4/4


Training Epoch 4: 100%|██████████| 32/32 [00:15<00:00,  2.12it/s, loss=0.0742]


Epoch 4 Loss: 5.3393
Test Metrics: Precision=0.9271, Recall=0.9271, F1=0.9271

Fine-tuning roberta-large (large) with Train Size 250, Split 3...


Map: 100%|██████████| 250/250 [00:00<00:00, 1665.55 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 6865.55 examples/s]
Map: 100%|██████████| 948/948 [00:00<00:00, 9577.47 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 32/32 [00:15<00:00,  2.05it/s, loss=0.387]


Epoch 1 Loss: 19.0284
Epoch 2/4


Training Epoch 2: 100%|██████████| 32/32 [00:14<00:00,  2.18it/s, loss=0.261]


Epoch 2 Loss: 8.8851
Epoch 3/4


Training Epoch 3: 100%|██████████| 32/32 [00:15<00:00,  2.07it/s, loss=0.174]


Epoch 3 Loss: 6.2143
Epoch 4/4


Training Epoch 4: 100%|██████████| 32/32 [00:14<00:00,  2.15it/s, loss=0.0188]


Epoch 4 Loss: 4.4198
Test Metrics: Precision=0.9264, Recall=0.9264, F1=0.9264

Fine-tuning roberta-large (large) with Train Size 250, Split 4...


Map: 100%|██████████| 250/250 [00:00<00:00, 8530.56 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 6852.09 examples/s]
Map: 100%|██████████| 948/948 [00:00<00:00, 9273.83 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 32/32 [00:15<00:00,  2.11it/s, loss=0.406]


Epoch 1 Loss: 23.5455
Epoch 2/4


Training Epoch 2: 100%|██████████| 32/32 [00:15<00:00,  2.02it/s, loss=0.131]


Epoch 2 Loss: 9.3530
Epoch 3/4


Training Epoch 3: 100%|██████████| 32/32 [00:15<00:00,  2.08it/s, loss=0.21] 


Epoch 3 Loss: 6.4025
Epoch 4/4


Training Epoch 4: 100%|██████████| 32/32 [00:15<00:00,  2.12it/s, loss=0.159] 


Epoch 4 Loss: 4.9099
Test Metrics: Precision=0.9243, Recall=0.9243, F1=0.9243

Fine-tuning roberta-large (large) with Train Size 250, Split 5...


Map: 100%|██████████| 250/250 [00:00<00:00, 8495.65 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 6334.28 examples/s]
Map: 100%|██████████| 948/948 [00:00<00:00, 9484.26 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 32/32 [00:16<00:00,  1.94it/s, loss=0.485]


Epoch 1 Loss: 21.1574
Epoch 2/4


Training Epoch 2: 100%|██████████| 32/32 [00:16<00:00,  1.92it/s, loss=0.388]


Epoch 2 Loss: 10.3883
Epoch 3/4


Training Epoch 3: 100%|██████████| 32/32 [00:16<00:00,  1.95it/s, loss=0.337]


Epoch 3 Loss: 7.2573
Epoch 4/4


Training Epoch 4: 100%|██████████| 32/32 [00:16<00:00,  1.94it/s, loss=0.348]


Epoch 4 Loss: 5.5437
Test Metrics: Precision=0.9284, Recall=0.9284, F1=0.9284

Fine-tuning roberta-large (large) with Train Size 255, Split 1...


Map: 100%|██████████| 255/255 [00:00<00:00, 8372.59 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 6476.41 examples/s]
Map: 100%|██████████| 942/942 [00:00<00:00, 9300.01 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 32/32 [00:14<00:00,  2.14it/s, loss=0.31] 


Epoch 1 Loss: 19.0105
Epoch 2/4


Training Epoch 2: 100%|██████████| 32/32 [00:14<00:00,  2.24it/s, loss=0.267]


Epoch 2 Loss: 8.5978
Epoch 3/4


Training Epoch 3: 100%|██████████| 32/32 [00:14<00:00,  2.21it/s, loss=0.227] 


Epoch 3 Loss: 5.5710
Epoch 4/4


Training Epoch 4: 100%|██████████| 32/32 [00:14<00:00,  2.16it/s, loss=0.248] 


Epoch 4 Loss: 3.8954
Test Metrics: Precision=0.9283, Recall=0.9283, F1=0.9283

Fine-tuning roberta-large (large) with Train Size 255, Split 2...


Map: 100%|██████████| 255/255 [00:00<00:00, 8568.51 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 6914.36 examples/s]
Map: 100%|██████████| 942/942 [00:00<00:00, 9361.53 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 32/32 [00:15<00:00,  2.12it/s, loss=0.515]


Epoch 1 Loss: 22.4079
Epoch 2/4


Training Epoch 2: 100%|██████████| 32/32 [00:15<00:00,  2.04it/s, loss=0.251]


Epoch 2 Loss: 9.0287
Epoch 3/4


Training Epoch 3: 100%|██████████| 32/32 [00:15<00:00,  2.01it/s, loss=0.283]


Epoch 3 Loss: 6.4587
Epoch 4/4


Training Epoch 4: 100%|██████████| 32/32 [00:15<00:00,  2.09it/s, loss=0.0823]


Epoch 4 Loss: 4.3724
Test Metrics: Precision=0.9284, Recall=0.9284, F1=0.9284

Fine-tuning roberta-large (large) with Train Size 255, Split 3...


Map: 100%|██████████| 255/255 [00:00<00:00, 8504.94 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 5926.29 examples/s]
Map: 100%|██████████| 942/942 [00:00<00:00, 9756.34 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 32/32 [00:15<00:00,  2.05it/s, loss=0.26] 


Epoch 1 Loss: 19.9595
Epoch 2/4


Training Epoch 2: 100%|██████████| 32/32 [00:15<00:00,  2.08it/s, loss=0.146]


Epoch 2 Loss: 10.1751
Epoch 3/4


Training Epoch 3: 100%|██████████| 32/32 [00:14<00:00,  2.15it/s, loss=0.259]


Epoch 3 Loss: 7.1850
Epoch 4/4


Training Epoch 4: 100%|██████████| 32/32 [00:15<00:00,  2.08it/s, loss=0.205] 


Epoch 4 Loss: 5.6459
Test Metrics: Precision=0.9248, Recall=0.9248, F1=0.9248

Fine-tuning roberta-large (large) with Train Size 255, Split 4...


Map: 100%|██████████| 255/255 [00:00<00:00, 7645.25 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 6479.16 examples/s]
Map: 100%|██████████| 942/942 [00:00<00:00, 9608.73 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 32/32 [00:16<00:00,  2.00it/s, loss=0.448]


Epoch 1 Loss: 18.8800
Epoch 2/4


Training Epoch 2: 100%|██████████| 32/32 [00:16<00:00,  1.99it/s, loss=0.195]


Epoch 2 Loss: 8.8402
Epoch 3/4


Training Epoch 3: 100%|██████████| 32/32 [00:15<00:00,  2.06it/s, loss=0.11]  


Epoch 3 Loss: 5.9538
Epoch 4/4


Training Epoch 4: 100%|██████████| 32/32 [00:15<00:00,  2.01it/s, loss=0.0931]


Epoch 4 Loss: 4.7068
Test Metrics: Precision=0.9247, Recall=0.9247, F1=0.9247

Fine-tuning roberta-large (large) with Train Size 255, Split 5...


Map: 100%|██████████| 255/255 [00:00<00:00, 8455.92 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 7018.03 examples/s]
Map: 100%|██████████| 942/942 [00:00<00:00, 9511.15 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 32/32 [00:16<00:00,  1.89it/s, loss=0.249]


Epoch 1 Loss: 20.1630
Epoch 2/4


Training Epoch 2: 100%|██████████| 32/32 [00:16<00:00,  1.89it/s, loss=0.147]


Epoch 2 Loss: 10.3563
Epoch 3/4


Training Epoch 3: 100%|██████████| 32/32 [00:16<00:00,  1.94it/s, loss=0.157]


Epoch 3 Loss: 7.0647
Epoch 4/4


Training Epoch 4: 100%|██████████| 32/32 [00:17<00:00,  1.86it/s, loss=0.181]


Epoch 4 Loss: 5.2350
Test Metrics: Precision=0.9295, Recall=0.9295, F1=0.9295

Fine-tuning roberta-large (large) with Train Size 260, Split 1...


Map: 100%|██████████| 260/260 [00:00<00:00, 8575.35 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 6426.92 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 9609.18 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 33/33 [00:15<00:00,  2.18it/s, loss=0.425]


Epoch 1 Loss: 19.9363
Epoch 2/4


Training Epoch 2: 100%|██████████| 33/33 [00:14<00:00,  2.21it/s, loss=0.173]


Epoch 2 Loss: 9.0395
Epoch 3/4


Training Epoch 3: 100%|██████████| 33/33 [00:14<00:00,  2.24it/s, loss=0.182] 


Epoch 3 Loss: 6.4619
Epoch 4/4


Training Epoch 4: 100%|██████████| 33/33 [00:14<00:00,  2.23it/s, loss=0.117] 


Epoch 4 Loss: 4.7953
Test Metrics: Precision=0.9268, Recall=0.9268, F1=0.9268

Fine-tuning roberta-large (large) with Train Size 260, Split 2...


Map: 100%|██████████| 260/260 [00:00<00:00, 8480.06 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 5997.96 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 9314.22 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 33/33 [00:15<00:00,  2.15it/s, loss=0.39] 


Epoch 1 Loss: 24.1132
Epoch 2/4


Training Epoch 2: 100%|██████████| 33/33 [00:15<00:00,  2.18it/s, loss=0.305]


Epoch 2 Loss: 11.7619
Epoch 3/4


Training Epoch 3: 100%|██████████| 33/33 [00:15<00:00,  2.12it/s, loss=0.19] 


Epoch 3 Loss: 8.6595
Epoch 4/4


Training Epoch 4: 100%|██████████| 33/33 [00:15<00:00,  2.14it/s, loss=0.208]


Epoch 4 Loss: 7.0686
Test Metrics: Precision=0.9239, Recall=0.9239, F1=0.9239

Fine-tuning roberta-large (large) with Train Size 260, Split 3...


Map: 100%|██████████| 260/260 [00:00<00:00, 8204.76 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 6534.75 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 8977.50 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 33/33 [00:15<00:00,  2.12it/s, loss=0.514]


Epoch 1 Loss: 18.7551
Epoch 2/4


Training Epoch 2: 100%|██████████| 33/33 [00:15<00:00,  2.12it/s, loss=0.141]


Epoch 2 Loss: 9.0836
Epoch 3/4


Training Epoch 3: 100%|██████████| 33/33 [00:15<00:00,  2.18it/s, loss=0.155]


Epoch 3 Loss: 8.7494
Epoch 4/4


Training Epoch 4: 100%|██████████| 33/33 [00:15<00:00,  2.08it/s, loss=0.185]


Epoch 4 Loss: 7.2181
Test Metrics: Precision=0.9085, Recall=0.9085, F1=0.9085

Fine-tuning roberta-large (large) with Train Size 260, Split 4...


Map: 100%|██████████| 260/260 [00:00<00:00, 8863.41 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 6670.25 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 9678.71 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 33/33 [00:16<00:00,  2.04it/s, loss=0.511]


Epoch 1 Loss: 19.8083
Epoch 2/4


Training Epoch 2: 100%|██████████| 33/33 [00:15<00:00,  2.07it/s, loss=0.232]


Epoch 2 Loss: 9.0707
Epoch 3/4


Training Epoch 3: 100%|██████████| 33/33 [00:15<00:00,  2.14it/s, loss=0.114]


Epoch 3 Loss: 6.0101
Epoch 4/4


Training Epoch 4: 100%|██████████| 33/33 [00:15<00:00,  2.10it/s, loss=0.106] 


Epoch 4 Loss: 4.5965
Test Metrics: Precision=0.9247, Recall=0.9247, F1=0.9247

Fine-tuning roberta-large (large) with Train Size 260, Split 5...


Map: 100%|██████████| 260/260 [00:00<00:00, 8693.07 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 6460.42 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 9495.50 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 33/33 [00:16<00:00,  2.06it/s, loss=0.473]


Epoch 1 Loss: 24.0830
Epoch 2/4


Training Epoch 2: 100%|██████████| 33/33 [00:17<00:00,  1.92it/s, loss=0.286]


Epoch 2 Loss: 12.5676
Epoch 3/4


Training Epoch 3: 100%|██████████| 33/33 [00:17<00:00,  1.94it/s, loss=0.582]


Epoch 3 Loss: 9.7814
Epoch 4/4


Training Epoch 4: 100%|██████████| 33/33 [00:16<00:00,  1.99it/s, loss=0.275]


Epoch 4 Loss: 7.4149
Test Metrics: Precision=0.9126, Recall=0.9126, F1=0.9126

Fine-tuning roberta-large (large) with Train Size 265, Split 1...


Map: 100%|██████████| 265/265 [00:00<00:00, 8651.95 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 6452.21 examples/s]
Map: 100%|██████████| 930/930 [00:00<00:00, 9516.51 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 34/34 [00:14<00:00,  2.27it/s, loss=0.302]


Epoch 1 Loss: 21.4514
Epoch 2/4


Training Epoch 2: 100%|██████████| 34/34 [00:15<00:00,  2.25it/s, loss=0.122]


Epoch 2 Loss: 9.2424
Epoch 3/4


Training Epoch 3: 100%|██████████| 34/34 [00:15<00:00,  2.23it/s, loss=0.263] 


Epoch 3 Loss: 6.2803
Epoch 4/4


Training Epoch 4: 100%|██████████| 34/34 [00:15<00:00,  2.24it/s, loss=0.248] 


Epoch 4 Loss: 4.6598
Test Metrics: Precision=0.9265, Recall=0.9265, F1=0.9265

Fine-tuning roberta-large (large) with Train Size 265, Split 2...


Map: 100%|██████████| 265/265 [00:00<00:00, 8491.79 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 6792.08 examples/s]
Map: 100%|██████████| 930/930 [00:00<00:00, 9480.86 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 34/34 [00:15<00:00,  2.18it/s, loss=0.0397]


Epoch 1 Loss: 21.1293
Epoch 2/4


Training Epoch 2: 100%|██████████| 34/34 [00:15<00:00,  2.21it/s, loss=0.429]


Epoch 2 Loss: 9.4283
Epoch 3/4


Training Epoch 3: 100%|██████████| 34/34 [00:15<00:00,  2.17it/s, loss=0.25] 


Epoch 3 Loss: 6.5608
Epoch 4/4


Training Epoch 4: 100%|██████████| 34/34 [00:16<00:00,  2.12it/s, loss=0.0353]


Epoch 4 Loss: 4.6154
Test Metrics: Precision=0.9270, Recall=0.9270, F1=0.9270

Fine-tuning roberta-large (large) with Train Size 265, Split 3...


Map: 100%|██████████| 265/265 [00:00<00:00, 8294.64 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 6608.54 examples/s]
Map: 100%|██████████| 930/930 [00:00<00:00, 4105.94 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 34/34 [00:16<00:00,  2.12it/s, loss=0.00811]


Epoch 1 Loss: 20.5057
Epoch 2/4


Training Epoch 2: 100%|██████████| 34/34 [00:15<00:00,  2.15it/s, loss=0.0984]


Epoch 2 Loss: 9.1916
Epoch 3/4


Training Epoch 3: 100%|██████████| 34/34 [00:16<00:00,  2.07it/s, loss=0.225] 


Epoch 3 Loss: 6.2687
Epoch 4/4


Training Epoch 4: 100%|██████████| 34/34 [00:16<00:00,  2.12it/s, loss=0.166] 


Epoch 4 Loss: 4.4993
Test Metrics: Precision=0.9292, Recall=0.9292, F1=0.9292

Fine-tuning roberta-large (large) with Train Size 265, Split 4...


Map: 100%|██████████| 265/265 [00:00<00:00, 8867.03 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 5630.37 examples/s]
Map: 100%|██████████| 930/930 [00:00<00:00, 9405.33 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 34/34 [00:16<00:00,  2.11it/s, loss=0.751]


Epoch 1 Loss: 21.3249
Epoch 2/4


Training Epoch 2: 100%|██████████| 34/34 [00:16<00:00,  2.09it/s, loss=0.218]


Epoch 2 Loss: 11.1850
Epoch 3/4


Training Epoch 3: 100%|██████████| 34/34 [00:16<00:00,  2.05it/s, loss=0.102]


Epoch 3 Loss: 7.5189
Epoch 4/4


Training Epoch 4: 100%|██████████| 34/34 [00:16<00:00,  2.08it/s, loss=0.145] 


Epoch 4 Loss: 5.7354
Test Metrics: Precision=0.9237, Recall=0.9237, F1=0.9237

Fine-tuning roberta-large (large) with Train Size 265, Split 5...


Map: 100%|██████████| 265/265 [00:00<00:00, 8379.19 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 6967.06 examples/s]
Map: 100%|██████████| 930/930 [00:00<00:00, 9548.03 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 34/34 [00:17<00:00,  1.96it/s, loss=0.528]


Epoch 1 Loss: 22.0294
Epoch 2/4


Training Epoch 2: 100%|██████████| 34/34 [00:17<00:00,  1.95it/s, loss=0.27] 


Epoch 2 Loss: 12.3291
Epoch 3/4


Training Epoch 3: 100%|██████████| 34/34 [00:16<00:00,  2.10it/s, loss=0.219]


Epoch 3 Loss: 8.4496
Epoch 4/4


Training Epoch 4: 100%|██████████| 34/34 [00:16<00:00,  2.01it/s, loss=0.119]


Epoch 4 Loss: 6.3007
Test Metrics: Precision=0.9244, Recall=0.9244, F1=0.9244

Fine-tuning roberta-large (large) with Train Size 270, Split 1...


Map: 100%|██████████| 270/270 [00:00<00:00, 8428.69 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 5451.34 examples/s]
Map: 100%|██████████| 924/924 [00:00<00:00, 9632.32 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 34/34 [00:15<00:00,  2.17it/s, loss=0.287]


Epoch 1 Loss: 21.1883
Epoch 2/4


Training Epoch 2: 100%|██████████| 34/34 [00:15<00:00,  2.21it/s, loss=0.243]


Epoch 2 Loss: 8.8082
Epoch 3/4


Training Epoch 3: 100%|██████████| 34/34 [00:15<00:00,  2.23it/s, loss=0.217]


Epoch 3 Loss: 5.8049
Epoch 4/4


Training Epoch 4: 100%|██████████| 34/34 [00:14<00:00,  2.34it/s, loss=0.174] 


Epoch 4 Loss: 4.3031
Test Metrics: Precision=0.9253, Recall=0.9253, F1=0.9253

Fine-tuning roberta-large (large) with Train Size 270, Split 2...


Map: 100%|██████████| 270/270 [00:00<00:00, 8570.30 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 6563.10 examples/s]
Map: 100%|██████████| 924/924 [00:00<00:00, 9531.74 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 34/34 [00:15<00:00,  2.17it/s, loss=0.317]


Epoch 1 Loss: 20.8125
Epoch 2/4


Training Epoch 2: 100%|██████████| 34/34 [00:15<00:00,  2.18it/s, loss=0.265]


Epoch 2 Loss: 9.9952
Epoch 3/4


Training Epoch 3: 100%|██████████| 34/34 [00:16<00:00,  2.12it/s, loss=0.139]


Epoch 3 Loss: 7.0581
Epoch 4/4


Training Epoch 4: 100%|██████████| 34/34 [00:15<00:00,  2.21it/s, loss=0.211] 


Epoch 4 Loss: 5.1422
Test Metrics: Precision=0.9273, Recall=0.9273, F1=0.9273

Fine-tuning roberta-large (large) with Train Size 270, Split 3...


Map: 100%|██████████| 270/270 [00:00<00:00, 8733.28 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 7206.94 examples/s]
Map: 100%|██████████| 924/924 [00:00<00:00, 9111.21 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 34/34 [00:15<00:00,  2.15it/s, loss=0.372]


Epoch 1 Loss: 21.6780
Epoch 2/4


Training Epoch 2: 100%|██████████| 34/34 [00:16<00:00,  2.09it/s, loss=0.334]


Epoch 2 Loss: 11.0196
Epoch 3/4


Training Epoch 3: 100%|██████████| 34/34 [00:16<00:00,  2.11it/s, loss=0.331]


Epoch 3 Loss: 7.1751
Epoch 4/4


Training Epoch 4: 100%|██████████| 34/34 [00:16<00:00,  2.12it/s, loss=0.178] 


Epoch 4 Loss: 5.3733
Test Metrics: Precision=0.9269, Recall=0.9269, F1=0.9269

Fine-tuning roberta-large (large) with Train Size 270, Split 4...


Map: 100%|██████████| 270/270 [00:00<00:00, 8116.26 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 6461.43 examples/s]
Map: 100%|██████████| 924/924 [00:00<00:00, 9139.08 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 34/34 [00:17<00:00,  1.99it/s, loss=0.318]


Epoch 1 Loss: 20.8864
Epoch 2/4


Training Epoch 2: 100%|██████████| 34/34 [00:16<00:00,  2.00it/s, loss=0.34] 


Epoch 2 Loss: 9.2760
Epoch 3/4


Training Epoch 3: 100%|██████████| 34/34 [00:15<00:00,  2.14it/s, loss=0.124]


Epoch 3 Loss: 6.4526
Epoch 4/4


Training Epoch 4: 100%|██████████| 34/34 [00:16<00:00,  2.07it/s, loss=0.119] 


Epoch 4 Loss: 4.7550
Test Metrics: Precision=0.9277, Recall=0.9277, F1=0.9277

Fine-tuning roberta-large (large) with Train Size 270, Split 5...


Map: 100%|██████████| 270/270 [00:00<00:00, 8425.87 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 6058.54 examples/s]
Map: 100%|██████████| 924/924 [00:00<00:00, 9492.82 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 34/34 [00:17<00:00,  1.96it/s, loss=0.275]


Epoch 1 Loss: 21.8119
Epoch 2/4


Training Epoch 2: 100%|██████████| 34/34 [00:17<00:00,  1.97it/s, loss=0.206]


Epoch 2 Loss: 11.3248
Epoch 3/4


Training Epoch 3: 100%|██████████| 34/34 [00:17<00:00,  1.92it/s, loss=0.353] 


Epoch 3 Loss: 8.3223
Epoch 4/4


Training Epoch 4: 100%|██████████| 34/34 [00:17<00:00,  1.90it/s, loss=0.214] 


Epoch 4 Loss: 6.1728
Test Metrics: Precision=0.9274, Recall=0.9274, F1=0.9274

Fine-tuning roberta-large (large) with Train Size 275, Split 1...


Map: 100%|██████████| 275/275 [00:00<00:00, 8273.92 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 6739.31 examples/s]
Map: 100%|██████████| 918/918 [00:00<00:00, 9194.13 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 35/35 [00:15<00:00,  2.27it/s, loss=0.434]


Epoch 1 Loss: 18.0104
Epoch 2/4


Training Epoch 2: 100%|██████████| 35/35 [00:15<00:00,  2.27it/s, loss=0.0784]


Epoch 2 Loss: 8.4791
Epoch 3/4


Training Epoch 3: 100%|██████████| 35/35 [00:14<00:00,  2.35it/s, loss=0.274] 


Epoch 3 Loss: 5.7364
Epoch 4/4


Training Epoch 4: 100%|██████████| 35/35 [00:14<00:00,  2.37it/s, loss=0.0967]


Epoch 4 Loss: 3.9159
Test Metrics: Precision=0.9283, Recall=0.9283, F1=0.9283

Fine-tuning roberta-large (large) with Train Size 275, Split 2...


Map: 100%|██████████| 275/275 [00:00<00:00, 8474.21 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 6149.35 examples/s]
Map: 100%|██████████| 918/918 [00:00<00:00, 9528.66 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 35/35 [00:16<00:00,  2.17it/s, loss=0.349]


Epoch 1 Loss: 20.2879
Epoch 2/4


Training Epoch 2: 100%|██████████| 35/35 [00:15<00:00,  2.30it/s, loss=0.233]


Epoch 2 Loss: 9.9535
Epoch 3/4


Training Epoch 3: 100%|██████████| 35/35 [00:15<00:00,  2.20it/s, loss=0.28]  


Epoch 3 Loss: 6.5624
Epoch 4/4


Training Epoch 4: 100%|██████████| 35/35 [00:15<00:00,  2.19it/s, loss=0.191] 


Epoch 4 Loss: 5.0362
Test Metrics: Precision=0.9301, Recall=0.9301, F1=0.9301

Fine-tuning roberta-large (large) with Train Size 275, Split 3...


Map: 100%|██████████| 275/275 [00:00<00:00, 8800.54 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 6846.73 examples/s]
Map: 100%|██████████| 918/918 [00:00<00:00, 9384.85 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 35/35 [00:15<00:00,  2.20it/s, loss=0.256]


Epoch 1 Loss: 20.5696
Epoch 2/4


Training Epoch 2: 100%|██████████| 35/35 [00:16<00:00,  2.09it/s, loss=0.198]


Epoch 2 Loss: 9.7105
Epoch 3/4


Training Epoch 3: 100%|██████████| 35/35 [00:15<00:00,  2.24it/s, loss=0.192] 


Epoch 3 Loss: 6.6354
Epoch 4/4


Training Epoch 4: 100%|██████████| 35/35 [00:16<00:00,  2.16it/s, loss=0.198] 


Epoch 4 Loss: 4.7492
Test Metrics: Precision=0.9273, Recall=0.9273, F1=0.9273

Fine-tuning roberta-large (large) with Train Size 275, Split 4...


Map: 100%|██████████| 275/275 [00:00<00:00, 8254.32 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 7966.53 examples/s]
Map: 100%|██████████| 918/918 [00:00<00:00, 9202.21 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 35/35 [00:16<00:00,  2.09it/s, loss=0.514]


Epoch 1 Loss: 20.7032
Epoch 2/4


Training Epoch 2: 100%|██████████| 35/35 [00:16<00:00,  2.14it/s, loss=0.231]


Epoch 2 Loss: 8.6590
Epoch 3/4


Training Epoch 3: 100%|██████████| 35/35 [00:16<00:00,  2.12it/s, loss=0.214]


Epoch 3 Loss: 5.5700
Epoch 4/4


Training Epoch 4: 100%|██████████| 35/35 [00:16<00:00,  2.13it/s, loss=0.0549]


Epoch 4 Loss: 4.1691
Test Metrics: Precision=0.9295, Recall=0.9295, F1=0.9295

Fine-tuning roberta-large (large) with Train Size 275, Split 5...


Map: 100%|██████████| 275/275 [00:00<00:00, 8311.90 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 6038.45 examples/s]
Map: 100%|██████████| 918/918 [00:00<00:00, 9652.69 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 35/35 [00:17<00:00,  1.97it/s, loss=0.323]


Epoch 1 Loss: 22.3138
Epoch 2/4


Training Epoch 2: 100%|██████████| 35/35 [00:17<00:00,  2.00it/s, loss=0.403]


Epoch 2 Loss: 11.2565
Epoch 3/4


Training Epoch 3: 100%|██████████| 35/35 [00:17<00:00,  1.97it/s, loss=0.146]


Epoch 3 Loss: 7.9521
Epoch 4/4


Training Epoch 4: 100%|██████████| 35/35 [00:17<00:00,  1.99it/s, loss=0.134] 


Epoch 4 Loss: 6.3382
Test Metrics: Precision=0.9276, Recall=0.9276, F1=0.9276

Fine-tuning roberta-large (large) with Train Size 280, Split 1...


Map: 100%|██████████| 280/280 [00:00<00:00, 8883.62 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 7158.82 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 9552.84 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 35/35 [00:15<00:00,  2.23it/s, loss=0.512]


Epoch 1 Loss: 21.1759
Epoch 2/4


Training Epoch 2: 100%|██████████| 35/35 [00:15<00:00,  2.26it/s, loss=0.162]


Epoch 2 Loss: 9.5067
Epoch 3/4


Training Epoch 3: 100%|██████████| 35/35 [00:16<00:00,  2.14it/s, loss=0.139]


Epoch 3 Loss: 6.5867
Epoch 4/4


Training Epoch 4: 100%|██████████| 35/35 [00:15<00:00,  2.24it/s, loss=0.121] 


Epoch 4 Loss: 4.7814
Test Metrics: Precision=0.9269, Recall=0.9269, F1=0.9269

Fine-tuning roberta-large (large) with Train Size 280, Split 2...


Map: 100%|██████████| 280/280 [00:00<00:00, 8691.57 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 6746.16 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 9632.27 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 35/35 [00:16<00:00,  2.13it/s, loss=0.312]


Epoch 1 Loss: 21.0616
Epoch 2/4


Training Epoch 2: 100%|██████████| 35/35 [00:15<00:00,  2.19it/s, loss=0.294]


Epoch 2 Loss: 9.7897
Epoch 3/4


Training Epoch 3: 100%|██████████| 35/35 [00:16<00:00,  2.11it/s, loss=0.147] 


Epoch 3 Loss: 6.4911
Epoch 4/4


Training Epoch 4: 100%|██████████| 35/35 [00:16<00:00,  2.15it/s, loss=0.136] 


Epoch 4 Loss: 4.8376
Test Metrics: Precision=0.9262, Recall=0.9262, F1=0.9262

Fine-tuning roberta-large (large) with Train Size 280, Split 3...


Map: 100%|██████████| 280/280 [00:00<00:00, 9206.40 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 5751.81 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 9660.78 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 35/35 [00:16<00:00,  2.10it/s, loss=0.547]


Epoch 1 Loss: 20.5519
Epoch 2/4


Training Epoch 2: 100%|██████████| 35/35 [00:16<00:00,  2.13it/s, loss=0.134]


Epoch 2 Loss: 9.7245
Epoch 3/4


Training Epoch 3: 100%|██████████| 35/35 [00:16<00:00,  2.16it/s, loss=0.215] 


Epoch 3 Loss: 6.2609
Epoch 4/4


Training Epoch 4: 100%|██████████| 35/35 [00:16<00:00,  2.17it/s, loss=0.115] 


Epoch 4 Loss: 4.6606
Test Metrics: Precision=0.9283, Recall=0.9283, F1=0.9283

Fine-tuning roberta-large (large) with Train Size 280, Split 4...


Map: 100%|██████████| 280/280 [00:00<00:00, 8748.09 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 6428.58 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 9656.22 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 35/35 [00:17<00:00,  2.05it/s, loss=0.271]


Epoch 1 Loss: 24.0976
Epoch 2/4


Training Epoch 2: 100%|██████████| 35/35 [00:16<00:00,  2.09it/s, loss=0.195]


Epoch 2 Loss: 9.8600
Epoch 3/4


Training Epoch 3: 100%|██████████| 35/35 [00:17<00:00,  2.04it/s, loss=0.251] 


Epoch 3 Loss: 6.9682
Epoch 4/4


Training Epoch 4: 100%|██████████| 35/35 [00:16<00:00,  2.06it/s, loss=0.175] 


Epoch 4 Loss: 5.2513
Test Metrics: Precision=0.9248, Recall=0.9248, F1=0.9248

Fine-tuning roberta-large (large) with Train Size 280, Split 5...


Map: 100%|██████████| 280/280 [00:00<00:00, 8609.44 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 6366.72 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 9445.30 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 35/35 [00:17<00:00,  2.02it/s, loss=0.358]


Epoch 1 Loss: 20.0264
Epoch 2/4


Training Epoch 2: 100%|██████████| 35/35 [00:18<00:00,  1.94it/s, loss=0.263]


Epoch 2 Loss: 11.5920
Epoch 3/4


Training Epoch 3: 100%|██████████| 35/35 [00:17<00:00,  1.95it/s, loss=0.217]


Epoch 3 Loss: 11.2421
Epoch 4/4


Training Epoch 4: 100%|██████████| 35/35 [00:18<00:00,  1.85it/s, loss=0.24] 


Epoch 4 Loss: 8.6473
Test Metrics: Precision=0.9152, Recall=0.9152, F1=0.9152

Fine-tuning roberta-large (large) with Train Size 285, Split 1...


Map: 100%|██████████| 285/285 [00:00<00:00, 8529.87 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 7707.63 examples/s]
Map: 100%|██████████| 906/906 [00:00<00:00, 9438.68 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 36/36 [00:16<00:00,  2.23it/s, loss=0.356]


Epoch 1 Loss: 21.9975
Epoch 2/4


Training Epoch 2: 100%|██████████| 36/36 [00:16<00:00,  2.17it/s, loss=0.16] 


Epoch 2 Loss: 10.6446
Epoch 3/4


Training Epoch 3: 100%|██████████| 36/36 [00:15<00:00,  2.26it/s, loss=0.251]


Epoch 3 Loss: 7.1828
Epoch 4/4


Training Epoch 4: 100%|██████████| 36/36 [00:16<00:00,  2.15it/s, loss=0.129] 


Epoch 4 Loss: 5.5756
Test Metrics: Precision=0.9275, Recall=0.9275, F1=0.9275

Fine-tuning roberta-large (large) with Train Size 285, Split 2...


Map: 100%|██████████| 285/285 [00:00<00:00, 8893.58 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 6418.65 examples/s]
Map: 100%|██████████| 906/906 [00:00<00:00, 9560.11 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 36/36 [00:16<00:00,  2.17it/s, loss=0.317]


Epoch 1 Loss: 20.9437
Epoch 2/4


Training Epoch 2: 100%|██████████| 36/36 [00:17<00:00,  2.11it/s, loss=0.297]


Epoch 2 Loss: 9.1476
Epoch 3/4


Training Epoch 3: 100%|██████████| 36/36 [00:16<00:00,  2.21it/s, loss=0.169] 


Epoch 3 Loss: 6.3996
Epoch 4/4


Training Epoch 4: 100%|██████████| 36/36 [00:16<00:00,  2.17it/s, loss=0.234] 


Epoch 4 Loss: 4.6664
Test Metrics: Precision=0.9290, Recall=0.9290, F1=0.9290

Fine-tuning roberta-large (large) with Train Size 285, Split 3...


Map: 100%|██████████| 285/285 [00:00<00:00, 8689.98 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 4882.97 examples/s]
Map: 100%|██████████| 906/906 [00:00<00:00, 4129.10 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 36/36 [00:16<00:00,  2.17it/s, loss=0.39] 


Epoch 1 Loss: 21.1807
Epoch 2/4


Training Epoch 2: 100%|██████████| 36/36 [00:16<00:00,  2.16it/s, loss=0.13] 


Epoch 2 Loss: 9.9518
Epoch 3/4


Training Epoch 3: 100%|██████████| 36/36 [00:16<00:00,  2.16it/s, loss=0.085] 


Epoch 3 Loss: 6.3224
Epoch 4/4


Training Epoch 4: 100%|██████████| 36/36 [00:16<00:00,  2.21it/s, loss=0.0903]


Epoch 4 Loss: 4.4874
Test Metrics: Precision=0.9289, Recall=0.9289, F1=0.9289

Fine-tuning roberta-large (large) with Train Size 285, Split 4...


Map: 100%|██████████| 285/285 [00:00<00:00, 8805.01 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 6483.93 examples/s]
Map: 100%|██████████| 906/906 [00:00<00:00, 9476.43 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 36/36 [00:17<00:00,  2.06it/s, loss=0.3]  


Epoch 1 Loss: 22.0585
Epoch 2/4


Training Epoch 2: 100%|██████████| 36/36 [00:17<00:00,  2.11it/s, loss=0.175]


Epoch 2 Loss: 10.3799
Epoch 3/4


Training Epoch 3: 100%|██████████| 36/36 [00:17<00:00,  2.09it/s, loss=0.304]


Epoch 3 Loss: 7.2925
Epoch 4/4


Training Epoch 4: 100%|██████████| 36/36 [00:16<00:00,  2.12it/s, loss=0.319] 


Epoch 4 Loss: 6.1697
Test Metrics: Precision=0.9260, Recall=0.9260, F1=0.9260

Fine-tuning roberta-large (large) with Train Size 285, Split 5...


Map: 100%|██████████| 285/285 [00:00<00:00, 8571.59 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 6899.52 examples/s]
Map: 100%|██████████| 906/906 [00:00<00:00, 9529.71 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 36/36 [00:18<00:00,  1.99it/s, loss=0.613]


Epoch 1 Loss: 23.3881
Epoch 2/4


Training Epoch 2: 100%|██████████| 36/36 [00:18<00:00,  1.94it/s, loss=0.28] 


Epoch 2 Loss: 10.1177
Epoch 3/4


Training Epoch 3: 100%|██████████| 36/36 [00:17<00:00,  2.05it/s, loss=0.153]


Epoch 3 Loss: 6.8807
Epoch 4/4


Training Epoch 4: 100%|██████████| 36/36 [00:17<00:00,  2.07it/s, loss=0.104] 


Epoch 4 Loss: 5.2507
Test Metrics: Precision=0.9318, Recall=0.9318, F1=0.9318

Fine-tuning roberta-large (large) with Train Size 290, Split 1...


Map: 100%|██████████| 290/290 [00:00<00:00, 8772.35 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 6730.94 examples/s]
Map: 100%|██████████| 900/900 [00:00<00:00, 9516.67 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 37/37 [00:16<00:00,  2.29it/s, loss=0.126]


Epoch 1 Loss: 21.1431
Epoch 2/4


Training Epoch 2: 100%|██████████| 37/37 [00:16<00:00,  2.29it/s, loss=0.268]


Epoch 2 Loss: 9.6394
Epoch 3/4


Training Epoch 3: 100%|██████████| 37/37 [00:16<00:00,  2.21it/s, loss=0.186] 


Epoch 3 Loss: 6.4963
Epoch 4/4


Training Epoch 4: 100%|██████████| 37/37 [00:16<00:00,  2.19it/s, loss=0.108] 


Epoch 4 Loss: 4.6612
Test Metrics: Precision=0.9284, Recall=0.9284, F1=0.9284

Fine-tuning roberta-large (large) with Train Size 290, Split 2...


Map: 100%|██████████| 290/290 [00:00<00:00, 8913.07 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 6417.37 examples/s]
Map: 100%|██████████| 900/900 [00:00<00:00, 9401.13 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 37/37 [00:16<00:00,  2.24it/s, loss=0.753]


Epoch 1 Loss: 22.8883
Epoch 2/4


Training Epoch 2: 100%|██████████| 37/37 [00:16<00:00,  2.22it/s, loss=0.16] 


Epoch 2 Loss: 10.8474
Epoch 3/4


Training Epoch 3: 100%|██████████| 37/37 [00:16<00:00,  2.19it/s, loss=0.136]


Epoch 3 Loss: 7.2995
Epoch 4/4


Training Epoch 4: 100%|██████████| 37/37 [00:16<00:00,  2.26it/s, loss=0.125] 


Epoch 4 Loss: 5.2578
Test Metrics: Precision=0.9267, Recall=0.9267, F1=0.9267

Fine-tuning roberta-large (large) with Train Size 290, Split 3...


Map: 100%|██████████| 290/290 [00:00<00:00, 8829.02 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 6162.16 examples/s]
Map: 100%|██████████| 900/900 [00:00<00:00, 9545.67 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 37/37 [00:17<00:00,  2.18it/s, loss=0.0689]


Epoch 1 Loss: 20.6717
Epoch 2/4


Training Epoch 2: 100%|██████████| 37/37 [00:17<00:00,  2.14it/s, loss=0.17] 


Epoch 2 Loss: 10.7218
Epoch 3/4


Training Epoch 3: 100%|██████████| 37/37 [00:16<00:00,  2.29it/s, loss=0.372] 


Epoch 3 Loss: 7.7187
Epoch 4/4


Training Epoch 4: 100%|██████████| 37/37 [00:17<00:00,  2.16it/s, loss=0.0383]


Epoch 4 Loss: 5.5915
Test Metrics: Precision=0.9286, Recall=0.9286, F1=0.9286

Fine-tuning roberta-large (large) with Train Size 290, Split 4...


Map: 100%|██████████| 290/290 [00:00<00:00, 8579.67 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 7286.58 examples/s]
Map: 100%|██████████| 900/900 [00:00<00:00, 9244.94 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 37/37 [00:17<00:00,  2.08it/s, loss=0.0874]


Epoch 1 Loss: 20.4424
Epoch 2/4


Training Epoch 2: 100%|██████████| 37/37 [00:16<00:00,  2.19it/s, loss=0.126]


Epoch 2 Loss: 9.9689
Epoch 3/4


Training Epoch 3: 100%|██████████| 37/37 [00:17<00:00,  2.06it/s, loss=0.0111]


Epoch 3 Loss: 6.7277
Epoch 4/4


Training Epoch 4: 100%|██████████| 37/37 [00:18<00:00,  2.04it/s, loss=0.12]  


Epoch 4 Loss: 5.0644
Test Metrics: Precision=0.9264, Recall=0.9264, F1=0.9264

Fine-tuning roberta-large (large) with Train Size 290, Split 5...


Map: 100%|██████████| 290/290 [00:00<00:00, 8538.53 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 7830.48 examples/s]
Map: 100%|██████████| 900/900 [00:00<00:00, 9494.82 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 37/37 [00:18<00:00,  1.97it/s, loss=0.287]


Epoch 1 Loss: 22.1442
Epoch 2/4


Training Epoch 2: 100%|██████████| 37/37 [00:17<00:00,  2.11it/s, loss=0.062]


Epoch 2 Loss: 9.5971
Epoch 3/4


Training Epoch 3: 100%|██████████| 37/37 [00:18<00:00,  2.04it/s, loss=0.169] 


Epoch 3 Loss: 6.6855
Epoch 4/4


Training Epoch 4: 100%|██████████| 37/37 [00:17<00:00,  2.07it/s, loss=0.118] 


Epoch 4 Loss: 4.5760
Test Metrics: Precision=0.9319, Recall=0.9319, F1=0.9319

Fine-tuning roberta-large (large) with Train Size 295, Split 1...


Map: 100%|██████████| 295/295 [00:00<00:00, 8746.10 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 5705.35 examples/s]
Map: 100%|██████████| 894/894 [00:00<00:00, 9651.11 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 37/37 [00:16<00:00,  2.24it/s, loss=0.285]


Epoch 1 Loss: 22.3030
Epoch 2/4


Training Epoch 2: 100%|██████████| 37/37 [00:16<00:00,  2.18it/s, loss=0.199]


Epoch 2 Loss: 11.2021
Epoch 3/4


Training Epoch 3: 100%|██████████| 37/37 [00:16<00:00,  2.24it/s, loss=0.158] 


Epoch 3 Loss: 8.2823
Epoch 4/4


Training Epoch 4: 100%|██████████| 37/37 [00:16<00:00,  2.19it/s, loss=0.169]


Epoch 4 Loss: 6.6452
Test Metrics: Precision=0.9235, Recall=0.9235, F1=0.9235

Fine-tuning roberta-large (large) with Train Size 295, Split 2...


Map: 100%|██████████| 295/295 [00:00<00:00, 8910.49 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 7247.02 examples/s]
Map: 100%|██████████| 894/894 [00:00<00:00, 9490.98 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 37/37 [00:17<00:00,  2.14it/s, loss=0.285]


Epoch 1 Loss: 21.3343
Epoch 2/4


Training Epoch 2: 100%|██████████| 37/37 [00:17<00:00,  2.13it/s, loss=0.169]


Epoch 2 Loss: 10.6277
Epoch 3/4


Training Epoch 3: 100%|██████████| 37/37 [00:17<00:00,  2.17it/s, loss=0.123]


Epoch 3 Loss: 7.3815
Epoch 4/4


Training Epoch 4: 100%|██████████| 37/37 [00:17<00:00,  2.15it/s, loss=0.0883]


Epoch 4 Loss: 5.2465
Test Metrics: Precision=0.9285, Recall=0.9285, F1=0.9285

Fine-tuning roberta-large (large) with Train Size 295, Split 3...


Map: 100%|██████████| 295/295 [00:00<00:00, 8638.69 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 7689.27 examples/s]
Map: 100%|██████████| 894/894 [00:00<00:00, 9367.69 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 37/37 [00:17<00:00,  2.18it/s, loss=0.387]


Epoch 1 Loss: 19.6696
Epoch 2/4


Training Epoch 2: 100%|██████████| 37/37 [00:17<00:00,  2.09it/s, loss=0.138]


Epoch 2 Loss: 9.4714
Epoch 3/4


Training Epoch 3: 100%|██████████| 37/37 [00:17<00:00,  2.08it/s, loss=0.193] 


Epoch 3 Loss: 6.5189
Epoch 4/4


Training Epoch 4: 100%|██████████| 37/37 [00:16<00:00,  2.19it/s, loss=0.0744]


Epoch 4 Loss: 4.4321
Test Metrics: Precision=0.9314, Recall=0.9314, F1=0.9314

Fine-tuning roberta-large (large) with Train Size 295, Split 4...


Map: 100%|██████████| 295/295 [00:00<00:00, 9165.60 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 6688.58 examples/s]
Map: 100%|██████████| 894/894 [00:00<00:00, 9580.91 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 37/37 [00:17<00:00,  2.06it/s, loss=0.383]


Epoch 1 Loss: 20.7787
Epoch 2/4


Training Epoch 2: 100%|██████████| 37/37 [00:17<00:00,  2.15it/s, loss=0.153]


Epoch 2 Loss: 9.5044
Epoch 3/4


Training Epoch 3: 100%|██████████| 37/37 [00:17<00:00,  2.06it/s, loss=0.172]


Epoch 3 Loss: 6.5520
Epoch 4/4


Training Epoch 4: 100%|██████████| 37/37 [00:17<00:00,  2.07it/s, loss=0.0961]


Epoch 4 Loss: 4.8320
Test Metrics: Precision=0.9289, Recall=0.9289, F1=0.9289

Fine-tuning roberta-large (large) with Train Size 295, Split 5...


Map: 100%|██████████| 295/295 [00:00<00:00, 8407.08 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 7048.05 examples/s]
Map: 100%|██████████| 894/894 [00:00<00:00, 9457.28 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 37/37 [00:18<00:00,  1.99it/s, loss=0.382]


Epoch 1 Loss: 22.8685
Epoch 2/4


Training Epoch 2: 100%|██████████| 37/37 [00:18<00:00,  2.02it/s, loss=0.364]


Epoch 2 Loss: 10.4585
Epoch 3/4


Training Epoch 3: 100%|██████████| 37/37 [00:18<00:00,  2.05it/s, loss=0.169] 


Epoch 3 Loss: 7.1355
Epoch 4/4


Training Epoch 4: 100%|██████████| 37/37 [00:18<00:00,  2.00it/s, loss=0.119] 


Epoch 4 Loss: 5.0948
Test Metrics: Precision=0.9327, Recall=0.9327, F1=0.9327

Fine-tuning roberta-large (large) with Train Size 300, Split 1...


Map: 100%|██████████| 300/300 [00:00<00:00, 8479.51 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 5879.45 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 9704.10 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 38/38 [00:16<00:00,  2.26it/s, loss=0.393]


Epoch 1 Loss: 21.7505
Epoch 2/4


Training Epoch 2: 100%|██████████| 38/38 [00:17<00:00,  2.23it/s, loss=0.227]


Epoch 2 Loss: 9.6942
Epoch 3/4


Training Epoch 3: 100%|██████████| 38/38 [00:16<00:00,  2.29it/s, loss=0.128] 


Epoch 3 Loss: 6.9370
Epoch 4/4


Training Epoch 4: 100%|██████████| 38/38 [00:16<00:00,  2.31it/s, loss=0.204] 


Epoch 4 Loss: 4.9397
Test Metrics: Precision=0.9290, Recall=0.9290, F1=0.9290

Fine-tuning roberta-large (large) with Train Size 300, Split 2...


Map: 100%|██████████| 300/300 [00:00<00:00, 8792.41 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 7375.46 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 9236.54 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 38/38 [00:17<00:00,  2.23it/s, loss=0.313]


Epoch 1 Loss: 21.1125
Epoch 2/4


Training Epoch 2: 100%|██████████| 38/38 [00:16<00:00,  2.26it/s, loss=0.314]


Epoch 2 Loss: 10.4505
Epoch 3/4


Training Epoch 3: 100%|██████████| 38/38 [00:17<00:00,  2.18it/s, loss=0.443]


Epoch 3 Loss: 7.2861
Epoch 4/4


Training Epoch 4: 100%|██████████| 38/38 [00:17<00:00,  2.12it/s, loss=0.0862]


Epoch 4 Loss: 5.2517
Test Metrics: Precision=0.9305, Recall=0.9305, F1=0.9305

Fine-tuning roberta-large (large) with Train Size 300, Split 3...


Map: 100%|██████████| 300/300 [00:00<00:00, 8727.28 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 7047.28 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 4024.92 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 38/38 [00:17<00:00,  2.16it/s, loss=0.425]


Epoch 1 Loss: 21.2014
Epoch 2/4


Training Epoch 2: 100%|██████████| 38/38 [00:17<00:00,  2.16it/s, loss=0.348]


Epoch 2 Loss: 10.1551
Epoch 3/4


Training Epoch 3: 100%|██████████| 38/38 [00:17<00:00,  2.22it/s, loss=0.149] 


Epoch 3 Loss: 6.9815
Epoch 4/4


Training Epoch 4: 100%|██████████| 38/38 [00:17<00:00,  2.16it/s, loss=0.075] 


Epoch 4 Loss: 5.1798
Test Metrics: Precision=0.9292, Recall=0.9292, F1=0.9292

Fine-tuning roberta-large (large) with Train Size 300, Split 4...


Map: 100%|██████████| 300/300 [00:00<00:00, 9049.07 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 6145.35 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 9549.94 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 38/38 [00:18<00:00,  2.09it/s, loss=0.468]


Epoch 1 Loss: 21.0947
Epoch 2/4


Training Epoch 2: 100%|██████████| 38/38 [00:18<00:00,  2.09it/s, loss=0.235]


Epoch 2 Loss: 9.4687
Epoch 3/4


Training Epoch 3: 100%|██████████| 38/38 [00:18<00:00,  2.06it/s, loss=0.236] 


Epoch 3 Loss: 6.3891
Epoch 4/4


Training Epoch 4: 100%|██████████| 38/38 [00:17<00:00,  2.14it/s, loss=0.108] 


Epoch 4 Loss: 4.6627
Test Metrics: Precision=0.9295, Recall=0.9295, F1=0.9295

Fine-tuning roberta-large (large) with Train Size 300, Split 5...


Map: 100%|██████████| 300/300 [00:00<00:00, 8756.86 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 6669.09 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 9339.58 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 38/38 [00:18<00:00,  2.06it/s, loss=0.498]


Epoch 1 Loss: 21.6316
Epoch 2/4


Training Epoch 2: 100%|██████████| 38/38 [00:19<00:00,  2.00it/s, loss=0.234]


Epoch 2 Loss: 10.3195
Epoch 3/4


Training Epoch 3: 100%|██████████| 38/38 [00:19<00:00,  1.98it/s, loss=0.209]


Epoch 3 Loss: 7.3163
Epoch 4/4


Training Epoch 4: 100%|██████████| 38/38 [00:18<00:00,  2.03it/s, loss=0.099] 


Epoch 4 Loss: 5.0997
Test Metrics: Precision=0.9340, Recall=0.9340, F1=0.9340

Fine-tuning roberta-large (large) with Train Size 305, Split 1...


Map: 100%|██████████| 305/305 [00:00<00:00, 8574.66 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 6951.38 examples/s]
Map: 100%|██████████| 882/882 [00:00<00:00, 9831.55 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 39/39 [00:17<00:00,  2.22it/s, loss=0.218]


Epoch 1 Loss: 20.6968
Epoch 2/4


Training Epoch 2: 100%|██████████| 39/39 [00:17<00:00,  2.23it/s, loss=0.445]


Epoch 2 Loss: 10.0382
Epoch 3/4


Training Epoch 3: 100%|██████████| 39/39 [00:17<00:00,  2.21it/s, loss=0.0488]


Epoch 3 Loss: 7.1271
Epoch 4/4


Training Epoch 4: 100%|██████████| 39/39 [00:16<00:00,  2.30it/s, loss=0.0168]


Epoch 4 Loss: 5.3351
Test Metrics: Precision=0.9304, Recall=0.9304, F1=0.9304

Fine-tuning roberta-large (large) with Train Size 305, Split 2...


Map: 100%|██████████| 305/305 [00:00<00:00, 9044.88 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 6849.77 examples/s]
Map: 100%|██████████| 882/882 [00:00<00:00, 9360.32 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 39/39 [00:16<00:00,  2.31it/s, loss=1.13] 


Epoch 1 Loss: 25.2218
Epoch 2/4


Training Epoch 2: 100%|██████████| 39/39 [00:16<00:00,  2.32it/s, loss=0.13] 


Epoch 2 Loss: 11.8107
Epoch 3/4


Training Epoch 3: 100%|██████████| 39/39 [00:17<00:00,  2.18it/s, loss=0.189] 


Epoch 3 Loss: 8.1151
Epoch 4/4


Training Epoch 4: 100%|██████████| 39/39 [00:17<00:00,  2.21it/s, loss=0.0942]


Epoch 4 Loss: 6.1937
Test Metrics: Precision=0.9298, Recall=0.9298, F1=0.9298

Fine-tuning roberta-large (large) with Train Size 305, Split 3...


Map: 100%|██████████| 305/305 [00:00<00:00, 8477.83 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 8183.35 examples/s]
Map: 100%|██████████| 882/882 [00:00<00:00, 9222.59 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 39/39 [00:18<00:00,  2.10it/s, loss=1.12] 


Epoch 1 Loss: 22.3485
Epoch 2/4


Training Epoch 2: 100%|██████████| 39/39 [00:18<00:00,  2.13it/s, loss=0.234]


Epoch 2 Loss: 13.1381
Epoch 3/4


Training Epoch 3: 100%|██████████| 39/39 [00:17<00:00,  2.20it/s, loss=0.203]


Epoch 3 Loss: 9.7579
Epoch 4/4


Training Epoch 4: 100%|██████████| 39/39 [00:17<00:00,  2.18it/s, loss=0.29] 


Epoch 4 Loss: 8.1715
Test Metrics: Precision=0.9224, Recall=0.9224, F1=0.9224

Fine-tuning roberta-large (large) with Train Size 305, Split 4...


Map: 100%|██████████| 305/305 [00:00<00:00, 8829.14 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 7534.16 examples/s]
Map: 100%|██████████| 882/882 [00:00<00:00, 9463.64 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 39/39 [00:18<00:00,  2.11it/s, loss=0.373]


Epoch 1 Loss: 21.6794
Epoch 2/4


Training Epoch 2: 100%|██████████| 39/39 [00:18<00:00,  2.09it/s, loss=0.339]


Epoch 2 Loss: 10.9214
Epoch 3/4


Training Epoch 3: 100%|██████████| 39/39 [00:18<00:00,  2.15it/s, loss=0.117]


Epoch 3 Loss: 7.8911
Epoch 4/4


Training Epoch 4: 100%|██████████| 39/39 [00:18<00:00,  2.10it/s, loss=0.0562]


Epoch 4 Loss: 5.8111
Test Metrics: Precision=0.9273, Recall=0.9273, F1=0.9273

Fine-tuning roberta-large (large) with Train Size 305, Split 5...


Map: 100%|██████████| 305/305 [00:00<00:00, 8407.52 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 7912.80 examples/s]
Map: 100%|██████████| 882/882 [00:00<00:00, 9273.04 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 39/39 [00:19<00:00,  1.96it/s, loss=0.119]


Epoch 1 Loss: 21.3619
Epoch 2/4


Training Epoch 2: 100%|██████████| 39/39 [00:19<00:00,  1.99it/s, loss=0.367]


Epoch 2 Loss: 10.7104
Epoch 3/4


Training Epoch 3: 100%|██████████| 39/39 [00:19<00:00,  2.00it/s, loss=0.527] 


Epoch 3 Loss: 7.6345
Epoch 4/4


Training Epoch 4: 100%|██████████| 39/39 [00:19<00:00,  1.97it/s, loss=0.0141]


Epoch 4 Loss: 5.6196
Test Metrics: Precision=0.9309, Recall=0.9309, F1=0.9309

Fine-tuning roberta-large (large) with Train Size 310, Split 1...


Map: 100%|██████████| 310/310 [00:00<00:00, 8741.78 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 7075.14 examples/s]
Map: 100%|██████████| 876/876 [00:00<00:00, 9205.39 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 39/39 [00:17<00:00,  2.20it/s, loss=0.241]


Epoch 1 Loss: 23.2455
Epoch 2/4


Training Epoch 2: 100%|██████████| 39/39 [00:16<00:00,  2.29it/s, loss=0.315]


Epoch 2 Loss: 11.2215
Epoch 3/4


Training Epoch 3: 100%|██████████| 39/39 [00:17<00:00,  2.18it/s, loss=0.242] 


Epoch 3 Loss: 8.0329
Epoch 4/4


Training Epoch 4: 100%|██████████| 39/39 [00:17<00:00,  2.17it/s, loss=0.108] 


Epoch 4 Loss: 5.9661
Test Metrics: Precision=0.9296, Recall=0.9296, F1=0.9296

Fine-tuning roberta-large (large) with Train Size 310, Split 2...


Map: 100%|██████████| 310/310 [00:00<00:00, 8903.64 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 6660.52 examples/s]
Map: 100%|██████████| 876/876 [00:00<00:00, 9207.76 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 39/39 [00:17<00:00,  2.20it/s, loss=0.24] 


Epoch 1 Loss: 23.8647
Epoch 2/4


Training Epoch 2: 100%|██████████| 39/39 [00:17<00:00,  2.29it/s, loss=0.342]


Epoch 2 Loss: 11.1925
Epoch 3/4


Training Epoch 3: 100%|██████████| 39/39 [00:16<00:00,  2.32it/s, loss=0.122]


Epoch 3 Loss: 7.6662
Epoch 4/4


Training Epoch 4: 100%|██████████| 39/39 [00:17<00:00,  2.24it/s, loss=0.0933]


Epoch 4 Loss: 5.7965
Test Metrics: Precision=0.9315, Recall=0.9315, F1=0.9315

Fine-tuning roberta-large (large) with Train Size 310, Split 3...


Map: 100%|██████████| 310/310 [00:00<00:00, 8787.80 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 6082.82 examples/s]
Map: 100%|██████████| 876/876 [00:00<00:00, 9390.42 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 39/39 [00:18<00:00,  2.11it/s, loss=0.226]


Epoch 1 Loss: 21.3316
Epoch 2/4


Training Epoch 2: 100%|██████████| 39/39 [00:18<00:00,  2.14it/s, loss=0.282]


Epoch 2 Loss: 10.1631
Epoch 3/4


Training Epoch 3: 100%|██████████| 39/39 [00:17<00:00,  2.21it/s, loss=0.118] 


Epoch 3 Loss: 6.7044
Epoch 4/4


Training Epoch 4: 100%|██████████| 39/39 [00:18<00:00,  2.12it/s, loss=0.131] 


Epoch 4 Loss: 4.4574
Test Metrics: Precision=0.9318, Recall=0.9318, F1=0.9318

Fine-tuning roberta-large (large) with Train Size 310, Split 4...


Map: 100%|██████████| 310/310 [00:00<00:00, 8728.86 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 6301.72 examples/s]
Map: 100%|██████████| 876/876 [00:00<00:00, 9434.74 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 39/39 [00:18<00:00,  2.13it/s, loss=0.332]


Epoch 1 Loss: 22.1185
Epoch 2/4


Training Epoch 2: 100%|██████████| 39/39 [00:18<00:00,  2.11it/s, loss=0.209]


Epoch 2 Loss: 10.9535
Epoch 3/4


Training Epoch 3: 100%|██████████| 39/39 [00:18<00:00,  2.06it/s, loss=0.162]


Epoch 3 Loss: 7.7067
Epoch 4/4


Training Epoch 4: 100%|██████████| 39/39 [00:19<00:00,  2.04it/s, loss=0.109] 


Epoch 4 Loss: 5.9508
Test Metrics: Precision=0.9274, Recall=0.9274, F1=0.9274

Fine-tuning roberta-large (large) with Train Size 310, Split 5...


Map: 100%|██████████| 310/310 [00:00<00:00, 8760.51 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 7300.38 examples/s]
Map: 100%|██████████| 876/876 [00:00<00:00, 9477.19 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 39/39 [00:20<00:00,  1.92it/s, loss=0.497]


Epoch 1 Loss: 21.2757
Epoch 2/4


Training Epoch 2: 100%|██████████| 39/39 [00:19<00:00,  1.96it/s, loss=0.263]


Epoch 2 Loss: 11.6114
Epoch 3/4


Training Epoch 3: 100%|██████████| 39/39 [00:19<00:00,  1.96it/s, loss=0.304]


Epoch 3 Loss: 7.8607
Epoch 4/4


Training Epoch 4: 100%|██████████| 39/39 [00:19<00:00,  1.96it/s, loss=0.0904]


Epoch 4 Loss: 5.9675
Test Metrics: Precision=0.9304, Recall=0.9304, F1=0.9304

Fine-tuning roberta-large (large) with Train Size 315, Split 1...


Map: 100%|██████████| 315/315 [00:00<00:00, 8921.64 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 7281.78 examples/s]
Map: 100%|██████████| 870/870 [00:00<00:00, 9548.90 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 40/40 [00:17<00:00,  2.24it/s, loss=0.377]


Epoch 1 Loss: 23.7787
Epoch 2/4


Training Epoch 2: 100%|██████████| 40/40 [00:18<00:00,  2.21it/s, loss=0.425]


Epoch 2 Loss: 11.3084
Epoch 3/4


Training Epoch 3: 100%|██████████| 40/40 [00:17<00:00,  2.23it/s, loss=0.12]  


Epoch 3 Loss: 8.5307
Epoch 4/4


Training Epoch 4: 100%|██████████| 40/40 [00:17<00:00,  2.23it/s, loss=0.167] 


Epoch 4 Loss: 6.2791
Test Metrics: Precision=0.9276, Recall=0.9276, F1=0.9276

Fine-tuning roberta-large (large) with Train Size 315, Split 2...


Map: 100%|██████████| 315/315 [00:00<00:00, 8907.87 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 6489.86 examples/s]
Map: 100%|██████████| 870/870 [00:00<00:00, 9473.68 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 40/40 [00:17<00:00,  2.25it/s, loss=0.312]


Epoch 1 Loss: 23.9842
Epoch 2/4


Training Epoch 2: 100%|██████████| 40/40 [00:18<00:00,  2.20it/s, loss=0.411]


Epoch 2 Loss: 10.9918
Epoch 3/4


Training Epoch 3: 100%|██████████| 40/40 [00:18<00:00,  2.22it/s, loss=0.247]


Epoch 3 Loss: 8.1539
Epoch 4/4


Training Epoch 4: 100%|██████████| 40/40 [00:18<00:00,  2.22it/s, loss=0.128] 


Epoch 4 Loss: 5.9879
Test Metrics: Precision=0.9279, Recall=0.9279, F1=0.9279

Fine-tuning roberta-large (large) with Train Size 315, Split 3...


Map: 100%|██████████| 315/315 [00:00<00:00, 8866.56 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 5855.89 examples/s]
Map: 100%|██████████| 870/870 [00:00<00:00, 9538.07 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 40/40 [00:18<00:00,  2.20it/s, loss=0.317]


Epoch 1 Loss: 21.9740
Epoch 2/4


Training Epoch 2: 100%|██████████| 40/40 [00:18<00:00,  2.21it/s, loss=0.212]


Epoch 2 Loss: 10.6951
Epoch 3/4


Training Epoch 3: 100%|██████████| 40/40 [00:18<00:00,  2.15it/s, loss=0.1]   


Epoch 3 Loss: 6.8741
Epoch 4/4


Training Epoch 4: 100%|██████████| 40/40 [00:18<00:00,  2.13it/s, loss=0.0883]


Epoch 4 Loss: 5.0766
Test Metrics: Precision=0.9309, Recall=0.9309, F1=0.9309

Fine-tuning roberta-large (large) with Train Size 315, Split 4...


Map: 100%|██████████| 315/315 [00:00<00:00, 8808.10 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 6786.55 examples/s]
Map: 100%|██████████| 870/870 [00:00<00:00, 9240.68 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 40/40 [00:18<00:00,  2.11it/s, loss=0.281]


Epoch 1 Loss: 21.3676
Epoch 2/4


Training Epoch 2: 100%|██████████| 40/40 [00:18<00:00,  2.12it/s, loss=0.162]


Epoch 2 Loss: 10.8361
Epoch 3/4


Training Epoch 3: 100%|██████████| 40/40 [00:18<00:00,  2.13it/s, loss=0.16]  


Epoch 3 Loss: 7.4251
Epoch 4/4


Training Epoch 4: 100%|██████████| 40/40 [00:19<00:00,  2.10it/s, loss=0.112] 


Epoch 4 Loss: 5.8150
Test Metrics: Precision=0.9287, Recall=0.9287, F1=0.9287

Fine-tuning roberta-large (large) with Train Size 315, Split 5...


Map: 100%|██████████| 315/315 [00:00<00:00, 8803.81 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 7289.21 examples/s]
Map: 100%|██████████| 870/870 [00:00<00:00, 9450.13 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 40/40 [00:19<00:00,  2.10it/s, loss=0.499]


Epoch 1 Loss: 24.2784
Epoch 2/4


Training Epoch 2: 100%|██████████| 40/40 [00:20<00:00,  1.96it/s, loss=0.235]


Epoch 2 Loss: 10.8815
Epoch 3/4


Training Epoch 3: 100%|██████████| 40/40 [00:19<00:00,  2.00it/s, loss=0.299]


Epoch 3 Loss: 8.0004
Epoch 4/4


Training Epoch 4: 100%|██████████| 40/40 [00:20<00:00,  1.96it/s, loss=0.126] 


Epoch 4 Loss: 5.5435
Test Metrics: Precision=0.9300, Recall=0.9300, F1=0.9300

Fine-tuning roberta-large (large) with Train Size 320, Split 1...


Map: 100%|██████████| 320/320 [00:00<00:00, 8855.11 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 6212.06 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 4107.21 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 40/40 [00:17<00:00,  2.26it/s, loss=0.411]


Epoch 1 Loss: 25.5455
Epoch 2/4


Training Epoch 2: 100%|██████████| 40/40 [00:18<00:00,  2.14it/s, loss=0.522]


Epoch 2 Loss: 12.5412
Epoch 3/4


Training Epoch 3: 100%|██████████| 40/40 [00:17<00:00,  2.23it/s, loss=0.205]


Epoch 3 Loss: 9.4509
Epoch 4/4


Training Epoch 4: 100%|██████████| 40/40 [00:17<00:00,  2.23it/s, loss=0.152] 


Epoch 4 Loss: 7.0495
Test Metrics: Precision=0.9271, Recall=0.9271, F1=0.9271

Fine-tuning roberta-large (large) with Train Size 320, Split 2...


Map: 100%|██████████| 320/320 [00:00<00:00, 9084.85 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 8073.73 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 9423.74 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 40/40 [00:18<00:00,  2.17it/s, loss=0.275]


Epoch 1 Loss: 23.9014
Epoch 2/4


Training Epoch 2: 100%|██████████| 40/40 [00:18<00:00,  2.17it/s, loss=0.363]


Epoch 2 Loss: 11.2555
Epoch 3/4


Training Epoch 3: 100%|██████████| 40/40 [00:17<00:00,  2.31it/s, loss=0.128]


Epoch 3 Loss: 8.4905
Epoch 4/4


Training Epoch 4: 100%|██████████| 40/40 [00:18<00:00,  2.19it/s, loss=0.186] 


Epoch 4 Loss: 6.0957
Test Metrics: Precision=0.9288, Recall=0.9288, F1=0.9288

Fine-tuning roberta-large (large) with Train Size 320, Split 3...


Map: 100%|██████████| 320/320 [00:00<00:00, 9089.40 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 7467.53 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 9613.48 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 40/40 [00:19<00:00,  2.08it/s, loss=0.236]


Epoch 1 Loss: 21.5892
Epoch 2/4


Training Epoch 2: 100%|██████████| 40/40 [00:18<00:00,  2.17it/s, loss=0.241]


Epoch 2 Loss: 10.1623
Epoch 3/4


Training Epoch 3: 100%|██████████| 40/40 [00:18<00:00,  2.14it/s, loss=0.103]


Epoch 3 Loss: 6.6820
Epoch 4/4


Training Epoch 4: 100%|██████████| 40/40 [00:18<00:00,  2.12it/s, loss=0.0813]


Epoch 4 Loss: 4.6331
Test Metrics: Precision=0.9302, Recall=0.9302, F1=0.9302

Fine-tuning roberta-large (large) with Train Size 320, Split 4...


Map: 100%|██████████| 320/320 [00:00<00:00, 8834.65 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 7047.95 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 9229.94 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 40/40 [00:19<00:00,  2.07it/s, loss=0.329]


Epoch 1 Loss: 22.0009
Epoch 2/4


Training Epoch 2: 100%|██████████| 40/40 [00:18<00:00,  2.16it/s, loss=0.137]


Epoch 2 Loss: 10.1806
Epoch 3/4


Training Epoch 3: 100%|██████████| 40/40 [00:19<00:00,  2.10it/s, loss=0.125] 


Epoch 3 Loss: 7.0829
Epoch 4/4


Training Epoch 4: 100%|██████████| 40/40 [00:19<00:00,  2.05it/s, loss=0.145] 


Epoch 4 Loss: 5.4025
Test Metrics: Precision=0.9296, Recall=0.9296, F1=0.9296

Fine-tuning roberta-large (large) with Train Size 320, Split 5...


Map: 100%|██████████| 320/320 [00:00<00:00, 8683.30 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 7256.39 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 9629.83 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 40/40 [00:20<00:00,  1.99it/s, loss=0.475]


Epoch 1 Loss: 25.9367
Epoch 2/4


Training Epoch 2: 100%|██████████| 40/40 [00:20<00:00,  1.95it/s, loss=0.275]


Epoch 2 Loss: 11.3418
Epoch 3/4


Training Epoch 3: 100%|██████████| 40/40 [00:20<00:00,  1.96it/s, loss=0.205]


Epoch 3 Loss: 9.2550
Epoch 4/4


Training Epoch 4: 100%|██████████| 40/40 [00:20<00:00,  2.00it/s, loss=0.0697]


Epoch 4 Loss: 7.0433
Test Metrics: Precision=0.9288, Recall=0.9288, F1=0.9288

Fine-tuning roberta-large (large) with Train Size 325, Split 1...


Map: 100%|██████████| 325/325 [00:00<00:00, 8657.44 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 6950.23 examples/s]
Map: 100%|██████████| 858/858 [00:00<00:00, 9391.70 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 41/41 [00:18<00:00,  2.27it/s, loss=0.284]


Epoch 1 Loss: 23.8862
Epoch 2/4


Training Epoch 2: 100%|██████████| 41/41 [00:18<00:00,  2.25it/s, loss=0.371]


Epoch 2 Loss: 10.5236
Epoch 3/4


Training Epoch 3: 100%|██████████| 41/41 [00:18<00:00,  2.24it/s, loss=0.202] 


Epoch 3 Loss: 7.0890
Epoch 4/4


Training Epoch 4: 100%|██████████| 41/41 [00:17<00:00,  2.29it/s, loss=0.0418]


Epoch 4 Loss: 4.9055
Test Metrics: Precision=0.9332, Recall=0.9332, F1=0.9332

Fine-tuning roberta-large (large) with Train Size 325, Split 2...


Map: 100%|██████████| 325/325 [00:00<00:00, 9101.19 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 7670.20 examples/s]
Map: 100%|██████████| 858/858 [00:00<00:00, 9192.07 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 41/41 [00:18<00:00,  2.21it/s, loss=0.546]


Epoch 1 Loss: 22.8754
Epoch 2/4


Training Epoch 2: 100%|██████████| 41/41 [00:18<00:00,  2.18it/s, loss=0.183]


Epoch 2 Loss: 10.6548
Epoch 3/4


Training Epoch 3: 100%|██████████| 41/41 [00:18<00:00,  2.26it/s, loss=0.0901]


Epoch 3 Loss: 6.9528
Epoch 4/4


Training Epoch 4: 100%|██████████| 41/41 [00:18<00:00,  2.16it/s, loss=0.152] 


Epoch 4 Loss: 5.0024
Test Metrics: Precision=0.9317, Recall=0.9317, F1=0.9317

Fine-tuning roberta-large (large) with Train Size 325, Split 3...


Map: 100%|██████████| 325/325 [00:00<00:00, 9128.55 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 6591.79 examples/s]
Map: 100%|██████████| 858/858 [00:00<00:00, 9570.41 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 41/41 [00:19<00:00,  2.11it/s, loss=0.219]


Epoch 1 Loss: 22.5951
Epoch 2/4


Training Epoch 2: 100%|██████████| 41/41 [00:19<00:00,  2.14it/s, loss=0.192]


Epoch 2 Loss: 10.5573
Epoch 3/4


Training Epoch 3: 100%|██████████| 41/41 [00:19<00:00,  2.12it/s, loss=0.163]


Epoch 3 Loss: 7.5234
Epoch 4/4


Training Epoch 4: 100%|██████████| 41/41 [00:19<00:00,  2.14it/s, loss=0.146] 


Epoch 4 Loss: 5.5951
Test Metrics: Precision=0.9299, Recall=0.9299, F1=0.9299

Fine-tuning roberta-large (large) with Train Size 325, Split 4...


Map: 100%|██████████| 325/325 [00:00<00:00, 8521.76 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 8343.94 examples/s]
Map: 100%|██████████| 858/858 [00:00<00:00, 9162.60 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 41/41 [00:19<00:00,  2.05it/s, loss=0.171]


Epoch 1 Loss: 21.9734
Epoch 2/4


Training Epoch 2: 100%|██████████| 41/41 [00:19<00:00,  2.12it/s, loss=0.242]


Epoch 2 Loss: 11.5907
Epoch 3/4


Training Epoch 3: 100%|██████████| 41/41 [00:19<00:00,  2.12it/s, loss=0.157] 


Epoch 3 Loss: 7.5336
Epoch 4/4


Training Epoch 4: 100%|██████████| 41/41 [00:18<00:00,  2.20it/s, loss=0.086] 


Epoch 4 Loss: 5.5491
Test Metrics: Precision=0.9256, Recall=0.9256, F1=0.9256

Fine-tuning roberta-large (large) with Train Size 325, Split 5...


Map: 100%|██████████| 325/325 [00:00<00:00, 8723.87 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 6017.52 examples/s]
Map: 100%|██████████| 858/858 [00:00<00:00, 9727.91 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 41/41 [00:20<00:00,  1.99it/s, loss=0.545]


Epoch 1 Loss: 26.6448
Epoch 2/4


Training Epoch 2: 100%|██████████| 41/41 [00:19<00:00,  2.06it/s, loss=0.435]


Epoch 2 Loss: 12.0209
Epoch 3/4


Training Epoch 3: 100%|██████████| 41/41 [00:21<00:00,  1.94it/s, loss=0.159] 


Epoch 3 Loss: 7.4728
Epoch 4/4


Training Epoch 4: 100%|██████████| 41/41 [00:19<00:00,  2.07it/s, loss=0.102] 


Epoch 4 Loss: 5.3252
Test Metrics: Precision=0.9283, Recall=0.9283, F1=0.9283

Fine-tuning roberta-large (large) with Train Size 330, Split 1...


Map: 100%|██████████| 330/330 [00:00<00:00, 8543.53 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 7946.95 examples/s]
Map: 100%|██████████| 852/852 [00:00<00:00, 9392.38 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 42/42 [00:19<00:00,  2.16it/s, loss=0.506]


Epoch 1 Loss: 24.1919
Epoch 2/4


Training Epoch 2: 100%|██████████| 42/42 [00:19<00:00,  2.21it/s, loss=0.0842]


Epoch 2 Loss: 10.5469
Epoch 3/4


Training Epoch 3: 100%|██████████| 42/42 [00:19<00:00,  2.14it/s, loss=0.0656]


Epoch 3 Loss: 7.3335
Epoch 4/4


Training Epoch 4: 100%|██████████| 42/42 [00:18<00:00,  2.21it/s, loss=0.0547]


Epoch 4 Loss: 5.4023
Test Metrics: Precision=0.9332, Recall=0.9332, F1=0.9332

Fine-tuning roberta-large (large) with Train Size 330, Split 2...


Map: 100%|██████████| 330/330 [00:00<00:00, 9079.12 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 7378.83 examples/s]
Map: 100%|██████████| 852/852 [00:00<00:00, 3830.13 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 42/42 [00:18<00:00,  2.23it/s, loss=0.17] 


Epoch 1 Loss: 22.1574
Epoch 2/4


Training Epoch 2: 100%|██████████| 42/42 [00:18<00:00,  2.26it/s, loss=0.181]


Epoch 2 Loss: 11.1026
Epoch 3/4


Training Epoch 3: 100%|██████████| 42/42 [00:19<00:00,  2.20it/s, loss=0.145] 


Epoch 3 Loss: 7.8568
Epoch 4/4


Training Epoch 4: 100%|██████████| 42/42 [00:19<00:00,  2.20it/s, loss=0.0415]


Epoch 4 Loss: 5.9481
Test Metrics: Precision=0.9310, Recall=0.9310, F1=0.9310

Fine-tuning roberta-large (large) with Train Size 330, Split 3...


Map: 100%|██████████| 330/330 [00:00<00:00, 9027.13 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 6896.81 examples/s]
Map: 100%|██████████| 852/852 [00:00<00:00, 9368.30 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 42/42 [00:18<00:00,  2.23it/s, loss=0.24] 


Epoch 1 Loss: 24.5909
Epoch 2/4


Training Epoch 2: 100%|██████████| 42/42 [00:19<00:00,  2.14it/s, loss=0.241]


Epoch 2 Loss: 11.7004
Epoch 3/4


Training Epoch 3: 100%|██████████| 42/42 [00:18<00:00,  2.22it/s, loss=0.34] 


Epoch 3 Loss: 8.1813
Epoch 4/4


Training Epoch 4: 100%|██████████| 42/42 [00:18<00:00,  2.23it/s, loss=0.064] 


Epoch 4 Loss: 5.9995
Test Metrics: Precision=0.9307, Recall=0.9307, F1=0.9307

Fine-tuning roberta-large (large) with Train Size 330, Split 4...


Map: 100%|██████████| 330/330 [00:00<00:00, 8629.61 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 7828.07 examples/s]
Map: 100%|██████████| 852/852 [00:00<00:00, 9439.47 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 42/42 [00:20<00:00,  2.07it/s, loss=0.0403]


Epoch 1 Loss: 21.6114
Epoch 2/4


Training Epoch 2: 100%|██████████| 42/42 [00:20<00:00,  2.09it/s, loss=1.37] 


Epoch 2 Loss: 11.9588
Epoch 3/4


Training Epoch 3: 100%|██████████| 42/42 [00:19<00:00,  2.14it/s, loss=0.152]


Epoch 3 Loss: 8.4670
Epoch 4/4


Training Epoch 4: 100%|██████████| 42/42 [00:19<00:00,  2.16it/s, loss=0.0853]


Epoch 4 Loss: 6.2113
Test Metrics: Precision=0.9272, Recall=0.9272, F1=0.9272

Fine-tuning roberta-large (large) with Train Size 330, Split 5...


Map: 100%|██████████| 330/330 [00:00<00:00, 8523.64 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 7090.78 examples/s]
Map: 100%|██████████| 852/852 [00:00<00:00, 9554.45 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 42/42 [00:21<00:00,  1.95it/s, loss=0.308]


Epoch 1 Loss: 24.1571
Epoch 2/4


Training Epoch 2: 100%|██████████| 42/42 [00:21<00:00,  1.94it/s, loss=0.254]


Epoch 2 Loss: 12.7748
Epoch 3/4


Training Epoch 3: 100%|██████████| 42/42 [00:21<00:00,  1.97it/s, loss=0.245]


Epoch 3 Loss: 8.9912
Epoch 4/4


Training Epoch 4: 100%|██████████| 42/42 [00:21<00:00,  1.94it/s, loss=0.0254]


Epoch 4 Loss: 6.5463
Test Metrics: Precision=0.9278, Recall=0.9278, F1=0.9278

Fine-tuning roberta-large (large) with Train Size 335, Split 1...


Map: 100%|██████████| 335/335 [00:00<00:00, 8582.02 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 7129.37 examples/s]
Map: 100%|██████████| 846/846 [00:00<00:00, 9367.50 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 42/42 [00:19<00:00,  2.15it/s, loss=0.283]


Epoch 1 Loss: 21.2619
Epoch 2/4


Training Epoch 2: 100%|██████████| 42/42 [00:18<00:00,  2.23it/s, loss=0.196]


Epoch 2 Loss: 10.2163
Epoch 3/4


Training Epoch 3: 100%|██████████| 42/42 [00:19<00:00,  2.17it/s, loss=0.101] 


Epoch 3 Loss: 7.5784
Epoch 4/4


Training Epoch 4: 100%|██████████| 42/42 [00:19<00:00,  2.21it/s, loss=0.0509]


Epoch 4 Loss: 5.6596
Test Metrics: Precision=0.9315, Recall=0.9315, F1=0.9315

Fine-tuning roberta-large (large) with Train Size 335, Split 2...


Map: 100%|██████████| 335/335 [00:00<00:00, 9311.29 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 7049.25 examples/s]
Map: 100%|██████████| 846/846 [00:00<00:00, 9375.37 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 42/42 [00:19<00:00,  2.20it/s, loss=0.375]


Epoch 1 Loss: 21.9739
Epoch 2/4


Training Epoch 2: 100%|██████████| 42/42 [00:19<00:00,  2.17it/s, loss=0.334]


Epoch 2 Loss: 10.6396
Epoch 3/4


Training Epoch 3: 100%|██████████| 42/42 [00:19<00:00,  2.19it/s, loss=0.203] 


Epoch 3 Loss: 6.6249
Epoch 4/4


Training Epoch 4: 100%|██████████| 42/42 [00:19<00:00,  2.18it/s, loss=0.129] 


Epoch 4 Loss: 4.5217
Test Metrics: Precision=0.9339, Recall=0.9339, F1=0.9339

Fine-tuning roberta-large (large) with Train Size 335, Split 3...


Map: 100%|██████████| 335/335 [00:00<00:00, 8834.50 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 7670.13 examples/s]
Map: 100%|██████████| 846/846 [00:00<00:00, 9258.13 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 42/42 [00:19<00:00,  2.14it/s, loss=0.357]


Epoch 1 Loss: 26.2472
Epoch 2/4


Training Epoch 2: 100%|██████████| 42/42 [00:19<00:00,  2.18it/s, loss=0.206]


Epoch 2 Loss: 12.1594
Epoch 3/4


Training Epoch 3: 100%|██████████| 42/42 [00:19<00:00,  2.20it/s, loss=0.131] 


Epoch 3 Loss: 8.4279
Epoch 4/4


Training Epoch 4: 100%|██████████| 42/42 [00:19<00:00,  2.17it/s, loss=0.215] 


Epoch 4 Loss: 5.9828
Test Metrics: Precision=0.9293, Recall=0.9293, F1=0.9293

Fine-tuning roberta-large (large) with Train Size 335, Split 4...


Map: 100%|██████████| 335/335 [00:00<00:00, 9076.41 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 7615.26 examples/s]
Map: 100%|██████████| 846/846 [00:00<00:00, 9284.95 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 42/42 [00:20<00:00,  2.08it/s, loss=0.271]


Epoch 1 Loss: 20.9100
Epoch 2/4


Training Epoch 2: 100%|██████████| 42/42 [00:20<00:00,  2.07it/s, loss=0.199]


Epoch 2 Loss: 10.3890
Epoch 3/4


Training Epoch 3: 100%|██████████| 42/42 [00:19<00:00,  2.13it/s, loss=0.18]  


Epoch 3 Loss: 6.9010
Epoch 4/4


Training Epoch 4: 100%|██████████| 42/42 [00:20<00:00,  2.06it/s, loss=0.0716]


Epoch 4 Loss: 4.9969
Test Metrics: Precision=0.9288, Recall=0.9288, F1=0.9288

Fine-tuning roberta-large (large) with Train Size 335, Split 5...


Map: 100%|██████████| 335/335 [00:00<00:00, 8586.22 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 7576.66 examples/s]
Map: 100%|██████████| 846/846 [00:00<00:00, 9559.76 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 42/42 [00:22<00:00,  1.91it/s, loss=0.47] 


Epoch 1 Loss: 23.0465
Epoch 2/4


Training Epoch 2: 100%|██████████| 42/42 [00:21<00:00,  1.97it/s, loss=0.165]


Epoch 2 Loss: 11.8186
Epoch 3/4


Training Epoch 3: 100%|██████████| 42/42 [00:21<00:00,  1.95it/s, loss=0.124]


Epoch 3 Loss: 8.5063
Epoch 4/4


Training Epoch 4: 100%|██████████| 42/42 [00:20<00:00,  2.01it/s, loss=0.136] 


Epoch 4 Loss: 6.1327
Test Metrics: Precision=0.9315, Recall=0.9315, F1=0.9315

Fine-tuning roberta-large (large) with Train Size 340, Split 1...


Map: 100%|██████████| 340/340 [00:00<00:00, 8664.86 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 7244.05 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9228.32 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 43/43 [00:19<00:00,  2.25it/s, loss=0.27] 


Epoch 1 Loss: 23.5375
Epoch 2/4


Training Epoch 2: 100%|██████████| 43/43 [00:19<00:00,  2.21it/s, loss=0.273]


Epoch 2 Loss: 11.5025
Epoch 3/4


Training Epoch 3: 100%|██████████| 43/43 [00:19<00:00,  2.22it/s, loss=0.159] 


Epoch 3 Loss: 7.9596
Epoch 4/4


Training Epoch 4: 100%|██████████| 43/43 [00:20<00:00,  2.15it/s, loss=0.159] 


Epoch 4 Loss: 5.8652
Test Metrics: Precision=0.9292, Recall=0.9292, F1=0.9292

Fine-tuning roberta-large (large) with Train Size 340, Split 2...


Map: 100%|██████████| 340/340 [00:00<00:00, 9172.06 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 6980.24 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9226.72 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 43/43 [00:19<00:00,  2.19it/s, loss=0.344]


Epoch 1 Loss: 23.1301
Epoch 2/4


Training Epoch 2: 100%|██████████| 43/43 [00:19<00:00,  2.21it/s, loss=0.328]


Epoch 2 Loss: 10.8835
Epoch 3/4


Training Epoch 3: 100%|██████████| 43/43 [00:19<00:00,  2.19it/s, loss=0.106] 


Epoch 3 Loss: 7.5255
Epoch 4/4


Training Epoch 4: 100%|██████████| 43/43 [00:19<00:00,  2.17it/s, loss=0.238] 


Epoch 4 Loss: 5.9497
Test Metrics: Precision=0.9313, Recall=0.9313, F1=0.9313

Fine-tuning roberta-large (large) with Train Size 340, Split 3...


Map: 100%|██████████| 340/340 [00:00<00:00, 8728.56 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 7166.15 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9332.28 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 43/43 [00:19<00:00,  2.17it/s, loss=0.408]


Epoch 1 Loss: 23.3584
Epoch 2/4


Training Epoch 2: 100%|██████████| 43/43 [00:19<00:00,  2.18it/s, loss=0.263]


Epoch 2 Loss: 10.8212
Epoch 3/4


Training Epoch 3: 100%|██████████| 43/43 [00:19<00:00,  2.18it/s, loss=0.128] 


Epoch 3 Loss: 7.1586
Epoch 4/4


Training Epoch 4: 100%|██████████| 43/43 [00:19<00:00,  2.17it/s, loss=0.0845]


Epoch 4 Loss: 4.9536
Test Metrics: Precision=0.9344, Recall=0.9344, F1=0.9344

Fine-tuning roberta-large (large) with Train Size 340, Split 4...


Map: 100%|██████████| 340/340 [00:00<00:00, 8748.43 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 6605.36 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9304.01 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 43/43 [00:19<00:00,  2.15it/s, loss=0.515]


Epoch 1 Loss: 26.5867
Epoch 2/4


Training Epoch 2: 100%|██████████| 43/43 [00:20<00:00,  2.12it/s, loss=0.29] 


Epoch 2 Loss: 14.4086
Epoch 3/4


Training Epoch 3: 100%|██████████| 43/43 [00:21<00:00,  2.03it/s, loss=0.173]


Epoch 3 Loss: 9.5080
Epoch 4/4


Training Epoch 4: 100%|██████████| 43/43 [00:20<00:00,  2.12it/s, loss=0.166] 


Epoch 4 Loss: 7.2052
Test Metrics: Precision=0.9236, Recall=0.9236, F1=0.9236

Fine-tuning roberta-large (large) with Train Size 340, Split 5...


Map: 100%|██████████| 340/340 [00:00<00:00, 8336.63 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 8341.75 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9584.84 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 43/43 [00:21<00:00,  2.03it/s, loss=0.296]


Epoch 1 Loss: 23.8933
Epoch 2/4


Training Epoch 2: 100%|██████████| 43/43 [00:22<00:00,  1.95it/s, loss=0.196]


Epoch 2 Loss: 11.5281
Epoch 3/4


Training Epoch 3: 100%|██████████| 43/43 [00:21<00:00,  2.01it/s, loss=0.128]


Epoch 3 Loss: 8.5481
Epoch 4/4


Training Epoch 4: 100%|██████████| 43/43 [00:22<00:00,  1.94it/s, loss=0.0977]


Epoch 4 Loss: 6.2695
Test Metrics: Precision=0.9328, Recall=0.9328, F1=0.9328

Fine-tuning roberta-large (large) with Train Size 345, Split 1...


Map: 100%|██████████| 345/345 [00:00<00:00, 8701.88 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 6880.65 examples/s]
Map: 100%|██████████| 834/834 [00:00<00:00, 9528.74 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 44/44 [00:19<00:00,  2.28it/s, loss=0.129]


Epoch 1 Loss: 21.8312
Epoch 2/4


Training Epoch 2: 100%|██████████| 44/44 [00:19<00:00,  2.25it/s, loss=0.652] 


Epoch 2 Loss: 10.3217
Epoch 3/4


Training Epoch 3: 100%|██████████| 44/44 [00:19<00:00,  2.24it/s, loss=0.0678]


Epoch 3 Loss: 6.6523
Epoch 4/4


Training Epoch 4: 100%|██████████| 44/44 [00:20<00:00,  2.17it/s, loss=0.0843]


Epoch 4 Loss: 4.5174
Test Metrics: Precision=0.9306, Recall=0.9306, F1=0.9306

Fine-tuning roberta-large (large) with Train Size 345, Split 2...


Map: 100%|██████████| 345/345 [00:00<00:00, 9004.18 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 6164.55 examples/s]
Map: 100%|██████████| 834/834 [00:00<00:00, 8880.35 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 44/44 [00:19<00:00,  2.25it/s, loss=0.25] 


Epoch 1 Loss: 25.3270
Epoch 2/4


Training Epoch 2: 100%|██████████| 44/44 [00:19<00:00,  2.21it/s, loss=0.385]


Epoch 2 Loss: 12.3111
Epoch 3/4


Training Epoch 3: 100%|██████████| 44/44 [00:20<00:00,  2.16it/s, loss=0.00394]


Epoch 3 Loss: 8.1548
Epoch 4/4


Training Epoch 4: 100%|██████████| 44/44 [00:20<00:00,  2.17it/s, loss=0.0972]


Epoch 4 Loss: 5.8885
Test Metrics: Precision=0.9295, Recall=0.9295, F1=0.9295

Fine-tuning roberta-large (large) with Train Size 345, Split 3...


Map: 100%|██████████| 345/345 [00:00<00:00, 8860.45 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 6914.18 examples/s]
Map: 100%|██████████| 834/834 [00:00<00:00, 9217.76 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 44/44 [00:19<00:00,  2.20it/s, loss=0.152]


Epoch 1 Loss: 25.3876
Epoch 2/4


Training Epoch 2: 100%|██████████| 44/44 [00:19<00:00,  2.20it/s, loss=0.0598]


Epoch 2 Loss: 11.3099
Epoch 3/4


Training Epoch 3: 100%|██████████| 44/44 [00:20<00:00,  2.19it/s, loss=0.0445]


Epoch 3 Loss: 7.1267
Epoch 4/4


Training Epoch 4: 100%|██████████| 44/44 [00:20<00:00,  2.20it/s, loss=0.0294]


Epoch 4 Loss: 4.7745
Test Metrics: Precision=0.9338, Recall=0.9338, F1=0.9338

Fine-tuning roberta-large (large) with Train Size 345, Split 4...


Map: 100%|██████████| 345/345 [00:00<00:00, 8893.50 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 7567.38 examples/s]
Map: 100%|██████████| 834/834 [00:00<00:00, 9079.92 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 44/44 [00:20<00:00,  2.13it/s, loss=0.283]


Epoch 1 Loss: 23.2465
Epoch 2/4


Training Epoch 2: 100%|██████████| 44/44 [00:20<00:00,  2.15it/s, loss=0.0873]


Epoch 2 Loss: 11.5608
Epoch 3/4


Training Epoch 3: 100%|██████████| 44/44 [00:20<00:00,  2.15it/s, loss=0.533] 


Epoch 3 Loss: 8.5909
Epoch 4/4


Training Epoch 4: 100%|██████████| 44/44 [00:21<00:00,  2.09it/s, loss=0.0973]


Epoch 4 Loss: 5.8665
Test Metrics: Precision=0.9289, Recall=0.9289, F1=0.9289

Fine-tuning roberta-large (large) with Train Size 345, Split 5...


Map: 100%|██████████| 345/345 [00:00<00:00, 8672.15 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 8092.13 examples/s]
Map: 100%|██████████| 834/834 [00:00<00:00, 9239.29 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 44/44 [00:21<00:00,  2.05it/s, loss=0.227]


Epoch 1 Loss: 23.0863
Epoch 2/4


Training Epoch 2: 100%|██████████| 44/44 [00:21<00:00,  2.04it/s, loss=0.171]


Epoch 2 Loss: 10.6638
Epoch 3/4


Training Epoch 3: 100%|██████████| 44/44 [00:21<00:00,  2.03it/s, loss=0.339]


Epoch 3 Loss: 7.7433
Epoch 4/4


Training Epoch 4: 100%|██████████| 44/44 [00:22<00:00,  1.97it/s, loss=0.0337]


Epoch 4 Loss: 5.2890
Test Metrics: Precision=0.9344, Recall=0.9344, F1=0.9344

Fine-tuning roberta-large (large) with Train Size 350, Split 1...


Map: 100%|██████████| 350/350 [00:00<00:00, 8711.07 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 6832.54 examples/s]
Map: 100%|██████████| 828/828 [00:00<00:00, 9240.50 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 44/44 [00:19<00:00,  2.23it/s, loss=0.331]


Epoch 1 Loss: 23.6860
Epoch 2/4


Training Epoch 2: 100%|██████████| 44/44 [00:20<00:00,  2.15it/s, loss=0.111]


Epoch 2 Loss: 10.7540
Epoch 3/4


Training Epoch 3: 100%|██████████| 44/44 [00:19<00:00,  2.26it/s, loss=0.21]  


Epoch 3 Loss: 7.1351
Epoch 4/4


Training Epoch 4: 100%|██████████| 44/44 [00:20<00:00,  2.18it/s, loss=0.112] 


Epoch 4 Loss: 5.0522
Test Metrics: Precision=0.9316, Recall=0.9316, F1=0.9316

Fine-tuning roberta-large (large) with Train Size 350, Split 2...


Map: 100%|██████████| 350/350 [00:00<00:00, 9034.89 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 7562.37 examples/s]
Map: 100%|██████████| 828/828 [00:00<00:00, 9361.10 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 44/44 [00:20<00:00,  2.13it/s, loss=0.383]


Epoch 1 Loss: 27.9517
Epoch 2/4


Training Epoch 2: 100%|██████████| 44/44 [00:20<00:00,  2.16it/s, loss=0.194]


Epoch 2 Loss: 12.8888
Epoch 3/4


Training Epoch 3: 100%|██████████| 44/44 [00:19<00:00,  2.20it/s, loss=0.252]


Epoch 3 Loss: 8.9324
Epoch 4/4


Training Epoch 4: 100%|██████████| 44/44 [00:21<00:00,  2.08it/s, loss=0.125] 


Epoch 4 Loss: 6.6188
Test Metrics: Precision=0.9282, Recall=0.9282, F1=0.9282

Fine-tuning roberta-large (large) with Train Size 350, Split 3...


Map: 100%|██████████| 350/350 [00:00<00:00, 8988.75 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 7744.49 examples/s]
Map: 100%|██████████| 828/828 [00:00<00:00, 9373.53 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 44/44 [00:20<00:00,  2.20it/s, loss=0.302]


Epoch 1 Loss: 22.7978
Epoch 2/4


Training Epoch 2: 100%|██████████| 44/44 [00:19<00:00,  2.21it/s, loss=0.423]


Epoch 2 Loss: 10.8629
Epoch 3/4


Training Epoch 3: 100%|██████████| 44/44 [00:20<00:00,  2.19it/s, loss=0.0985]


Epoch 3 Loss: 6.8955
Epoch 4/4


Training Epoch 4: 100%|██████████| 44/44 [00:20<00:00,  2.11it/s, loss=0.0744]


Epoch 4 Loss: 4.4186
Test Metrics: Precision=0.9340, Recall=0.9340, F1=0.9340

Fine-tuning roberta-large (large) with Train Size 350, Split 4...


Map: 100%|██████████| 350/350 [00:00<00:00, 9308.44 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 7832.08 examples/s]
Map: 100%|██████████| 828/828 [00:00<00:00, 9527.57 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 44/44 [00:20<00:00,  2.10it/s, loss=1.02] 


Epoch 1 Loss: 29.9516
Epoch 2/4


Training Epoch 2: 100%|██████████| 44/44 [00:21<00:00,  2.09it/s, loss=0.469]


Epoch 2 Loss: 13.6849
Epoch 3/4


Training Epoch 3: 100%|██████████| 44/44 [00:20<00:00,  2.13it/s, loss=0.223]


Epoch 3 Loss: 10.3453
Epoch 4/4


Training Epoch 4: 100%|██████████| 44/44 [00:20<00:00,  2.11it/s, loss=0.243]


Epoch 4 Loss: 9.6214
Test Metrics: Precision=0.9102, Recall=0.9102, F1=0.9102

Fine-tuning roberta-large (large) with Train Size 350, Split 5...


Map: 100%|██████████| 350/350 [00:00<00:00, 8525.06 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 7459.19 examples/s]
Map: 100%|██████████| 828/828 [00:00<00:00, 9295.80 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 44/44 [00:22<00:00,  1.99it/s, loss=0.375]


Epoch 1 Loss: 26.8806
Epoch 2/4


Training Epoch 2: 100%|██████████| 44/44 [00:21<00:00,  2.01it/s, loss=0.303]


Epoch 2 Loss: 12.2143
Epoch 3/4


Training Epoch 3: 100%|██████████| 44/44 [00:21<00:00,  2.01it/s, loss=0.161] 


Epoch 3 Loss: 8.2989
Epoch 4/4


Training Epoch 4: 100%|██████████| 44/44 [00:22<00:00,  1.99it/s, loss=0.195] 


Epoch 4 Loss: 5.9658
Test Metrics: Precision=0.9346, Recall=0.9346, F1=0.9346

Fine-tuning roberta-large (large) with Train Size 355, Split 1...


Map: 100%|██████████| 355/355 [00:00<00:00, 8840.91 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 6066.57 examples/s]
Map: 100%|██████████| 822/822 [00:00<00:00, 9487.03 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 45/45 [00:20<00:00,  2.23it/s, loss=0.202]


Epoch 1 Loss: 22.5288
Epoch 2/4


Training Epoch 2: 100%|██████████| 45/45 [00:19<00:00,  2.26it/s, loss=0.176]


Epoch 2 Loss: 10.5825
Epoch 3/4


Training Epoch 3: 100%|██████████| 45/45 [00:20<00:00,  2.22it/s, loss=0.3]   


Epoch 3 Loss: 7.4718
Epoch 4/4


Training Epoch 4: 100%|██████████| 45/45 [00:20<00:00,  2.23it/s, loss=0.102] 


Epoch 4 Loss: 5.3825
Test Metrics: Precision=0.9320, Recall=0.9320, F1=0.9320

Fine-tuning roberta-large (large) with Train Size 355, Split 2...


Map: 100%|██████████| 355/355 [00:00<00:00, 8676.12 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 7378.30 examples/s]
Map: 100%|██████████| 822/822 [00:00<00:00, 9200.93 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 45/45 [00:21<00:00,  2.14it/s, loss=0.323]


Epoch 1 Loss: 24.6179
Epoch 2/4


Training Epoch 2: 100%|██████████| 45/45 [00:20<00:00,  2.15it/s, loss=0.253]


Epoch 2 Loss: 11.9594
Epoch 3/4


Training Epoch 3: 100%|██████████| 45/45 [00:20<00:00,  2.17it/s, loss=0.185] 


Epoch 3 Loss: 7.8356
Epoch 4/4


Training Epoch 4: 100%|██████████| 45/45 [00:20<00:00,  2.21it/s, loss=0.185] 


Epoch 4 Loss: 5.8361
Test Metrics: Precision=0.9329, Recall=0.9329, F1=0.9329

Fine-tuning roberta-large (large) with Train Size 355, Split 3...


Map: 100%|██████████| 355/355 [00:00<00:00, 8833.89 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 8262.92 examples/s]
Map: 100%|██████████| 822/822 [00:00<00:00, 9409.86 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 45/45 [00:20<00:00,  2.18it/s, loss=0.295]


Epoch 1 Loss: 23.1152
Epoch 2/4


Training Epoch 2: 100%|██████████| 45/45 [00:21<00:00,  2.08it/s, loss=0.211]


Epoch 2 Loss: 11.6552
Epoch 3/4


Training Epoch 3: 100%|██████████| 45/45 [00:21<00:00,  2.12it/s, loss=0.246] 


Epoch 3 Loss: 8.1422
Epoch 4/4


Training Epoch 4: 100%|██████████| 45/45 [00:20<00:00,  2.17it/s, loss=0.0867]


Epoch 4 Loss: 5.8907
Test Metrics: Precision=0.9320, Recall=0.9320, F1=0.9320

Fine-tuning roberta-large (large) with Train Size 355, Split 4...


Map: 100%|██████████| 355/355 [00:00<00:00, 8979.48 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 7993.01 examples/s]
Map: 100%|██████████| 822/822 [00:00<00:00, 9270.18 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 45/45 [00:21<00:00,  2.13it/s, loss=0.443]


Epoch 1 Loss: 23.2335
Epoch 2/4


Training Epoch 2: 100%|██████████| 45/45 [00:21<00:00,  2.13it/s, loss=0.17] 


Epoch 2 Loss: 10.7555
Epoch 3/4


Training Epoch 3: 100%|██████████| 45/45 [00:20<00:00,  2.14it/s, loss=0.0415]


Epoch 3 Loss: 7.1825
Epoch 4/4


Training Epoch 4: 100%|██████████| 45/45 [00:21<00:00,  2.14it/s, loss=0.0667]


Epoch 4 Loss: 5.3861
Test Metrics: Precision=0.9283, Recall=0.9283, F1=0.9283

Fine-tuning roberta-large (large) with Train Size 355, Split 5...


Map: 100%|██████████| 355/355 [00:00<00:00, 8851.43 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 6899.96 examples/s]
Map: 100%|██████████| 822/822 [00:00<00:00, 9334.30 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 45/45 [00:22<00:00,  1.99it/s, loss=0.304]


Epoch 1 Loss: 22.6447
Epoch 2/4


Training Epoch 2: 100%|██████████| 45/45 [00:22<00:00,  2.04it/s, loss=0.206]


Epoch 2 Loss: 11.4545
Epoch 3/4


Training Epoch 3: 100%|██████████| 45/45 [00:21<00:00,  2.05it/s, loss=0.157] 


Epoch 3 Loss: 7.6926
Epoch 4/4


Training Epoch 4: 100%|██████████| 45/45 [00:22<00:00,  2.01it/s, loss=0.065] 


Epoch 4 Loss: 6.2998
Test Metrics: Precision=0.9322, Recall=0.9322, F1=0.9322

Fine-tuning roberta-large (large) with Train Size 360, Split 1...


Map: 100%|██████████| 360/360 [00:00<00:00, 8973.11 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 6629.56 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9303.85 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 45/45 [00:20<00:00,  2.19it/s, loss=0.351]


Epoch 1 Loss: 28.0108
Epoch 2/4


Training Epoch 2: 100%|██████████| 45/45 [00:20<00:00,  2.20it/s, loss=0.28] 


Epoch 2 Loss: 12.7652
Epoch 3/4


Training Epoch 3: 100%|██████████| 45/45 [00:20<00:00,  2.20it/s, loss=0.148] 


Epoch 3 Loss: 8.5706
Epoch 4/4


Training Epoch 4: 100%|██████████| 45/45 [00:20<00:00,  2.22it/s, loss=0.178] 


Epoch 4 Loss: 6.6302
Test Metrics: Precision=0.9306, Recall=0.9306, F1=0.9306

Fine-tuning roberta-large (large) with Train Size 360, Split 2...


Map: 100%|██████████| 360/360 [00:00<00:00, 8925.21 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 6949.65 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9448.30 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 45/45 [00:21<00:00,  2.12it/s, loss=0.358]


Epoch 1 Loss: 23.7693
Epoch 2/4


Training Epoch 2: 100%|██████████| 45/45 [00:21<00:00,  2.09it/s, loss=0.198]


Epoch 2 Loss: 11.8062
Epoch 3/4


Training Epoch 3: 100%|██████████| 45/45 [00:20<00:00,  2.16it/s, loss=0.198] 


Epoch 3 Loss: 7.6337
Epoch 4/4


Training Epoch 4: 100%|██████████| 45/45 [00:21<00:00,  2.09it/s, loss=0.0495]


Epoch 4 Loss: 5.0957
Test Metrics: Precision=0.9322, Recall=0.9322, F1=0.9322

Fine-tuning roberta-large (large) with Train Size 360, Split 3...


Map: 100%|██████████| 360/360 [00:00<00:00, 8677.07 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7571.33 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9466.41 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 45/45 [00:20<00:00,  2.21it/s, loss=0.458]


Epoch 1 Loss: 24.8262
Epoch 2/4


Training Epoch 2: 100%|██████████| 45/45 [00:21<00:00,  2.12it/s, loss=0.217]


Epoch 2 Loss: 11.4939
Epoch 3/4


Training Epoch 3: 100%|██████████| 45/45 [00:20<00:00,  2.17it/s, loss=0.218] 


Epoch 3 Loss: 7.2884
Epoch 4/4


Training Epoch 4: 100%|██████████| 45/45 [00:21<00:00,  2.14it/s, loss=0.126] 


Epoch 4 Loss: 5.1341
Test Metrics: Precision=0.9334, Recall=0.9334, F1=0.9334

Fine-tuning roberta-large (large) with Train Size 360, Split 4...


Map: 100%|██████████| 360/360 [00:00<00:00, 2100.21 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 6874.81 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 8982.60 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 45/45 [00:21<00:00,  2.13it/s, loss=0.666]


Epoch 1 Loss: 25.4503
Epoch 2/4


Training Epoch 2: 100%|██████████| 45/45 [00:20<00:00,  2.16it/s, loss=0.173] 


Epoch 2 Loss: 11.0254
Epoch 3/4


Training Epoch 3: 100%|██████████| 45/45 [00:21<00:00,  2.06it/s, loss=0.12]  


Epoch 3 Loss: 7.4875
Epoch 4/4


Training Epoch 4: 100%|██████████| 45/45 [00:22<00:00,  2.02it/s, loss=0.0935]


Epoch 4 Loss: 5.4341
Test Metrics: Precision=0.9294, Recall=0.9294, F1=0.9294

Fine-tuning roberta-large (large) with Train Size 360, Split 5...


Map: 100%|██████████| 360/360 [00:00<00:00, 8754.04 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7320.97 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9545.38 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 45/45 [00:22<00:00,  1.96it/s, loss=0.414]


Epoch 1 Loss: 25.6402
Epoch 2/4


Training Epoch 2: 100%|██████████| 45/45 [00:23<00:00,  1.95it/s, loss=0.208]


Epoch 2 Loss: 12.1353
Epoch 3/4


Training Epoch 3: 100%|██████████| 45/45 [00:22<00:00,  2.03it/s, loss=0.217] 


Epoch 3 Loss: 8.1276
Epoch 4/4


Training Epoch 4: 100%|██████████| 45/45 [00:22<00:00,  2.00it/s, loss=0.167] 


Epoch 4 Loss: 5.6951
Test Metrics: Precision=0.9324, Recall=0.9324, F1=0.9324

Fine-tuning roberta-large (large) with Train Size 365, Split 1...


Map: 100%|██████████| 365/365 [00:00<00:00, 9007.96 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 6861.12 examples/s]
Map: 100%|██████████| 810/810 [00:00<00:00, 9166.42 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 46/46 [00:20<00:00,  2.25it/s, loss=0.307]


Epoch 1 Loss: 24.4791
Epoch 2/4


Training Epoch 2: 100%|██████████| 46/46 [00:20<00:00,  2.20it/s, loss=0.14] 


Epoch 2 Loss: 12.4857
Epoch 3/4


Training Epoch 3: 100%|██████████| 46/46 [00:21<00:00,  2.17it/s, loss=0.158] 


Epoch 3 Loss: 8.4489
Epoch 4/4


Training Epoch 4: 100%|██████████| 46/46 [00:20<00:00,  2.27it/s, loss=0.134] 


Epoch 4 Loss: 6.0302
Test Metrics: Precision=0.9318, Recall=0.9318, F1=0.9318

Fine-tuning roberta-large (large) with Train Size 365, Split 2...


Map: 100%|██████████| 365/365 [00:00<00:00, 8818.92 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 6274.65 examples/s]
Map: 100%|██████████| 810/810 [00:00<00:00, 9483.47 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 46/46 [00:21<00:00,  2.17it/s, loss=0.338]


Epoch 1 Loss: 27.7957
Epoch 2/4


Training Epoch 2: 100%|██████████| 46/46 [00:21<00:00,  2.17it/s, loss=0.173]


Epoch 2 Loss: 12.6737
Epoch 3/4


Training Epoch 3: 100%|██████████| 46/46 [00:21<00:00,  2.14it/s, loss=0.216] 


Epoch 3 Loss: 8.5634
Epoch 4/4


Training Epoch 4: 100%|██████████| 46/46 [00:22<00:00,  2.07it/s, loss=0.083] 


Epoch 4 Loss: 6.0791
Test Metrics: Precision=0.9308, Recall=0.9308, F1=0.9308

Fine-tuning roberta-large (large) with Train Size 365, Split 3...


Map: 100%|██████████| 365/365 [00:00<00:00, 8806.39 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 7000.58 examples/s]
Map: 100%|██████████| 810/810 [00:00<00:00, 9418.24 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 46/46 [00:22<00:00,  2.07it/s, loss=0.263]


Epoch 1 Loss: 26.2031
Epoch 2/4


Training Epoch 2: 100%|██████████| 46/46 [00:21<00:00,  2.13it/s, loss=0.0976]


Epoch 2 Loss: 11.4970
Epoch 3/4


Training Epoch 3: 100%|██████████| 46/46 [00:21<00:00,  2.13it/s, loss=0.239] 


Epoch 3 Loss: 8.0328
Epoch 4/4


Training Epoch 4: 100%|██████████| 46/46 [00:21<00:00,  2.15it/s, loss=0.0992]


Epoch 4 Loss: 5.7882
Test Metrics: Precision=0.9292, Recall=0.9292, F1=0.9292

Fine-tuning roberta-large (large) with Train Size 365, Split 4...


Map: 100%|██████████| 365/365 [00:00<00:00, 8962.92 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 6323.90 examples/s]
Map: 100%|██████████| 810/810 [00:00<00:00, 9390.23 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 46/46 [00:21<00:00,  2.18it/s, loss=0.354]


Epoch 1 Loss: 23.8532
Epoch 2/4


Training Epoch 2: 100%|██████████| 46/46 [00:21<00:00,  2.12it/s, loss=0.347]


Epoch 2 Loss: 13.1920
Epoch 3/4


Training Epoch 3: 100%|██████████| 46/46 [00:21<00:00,  2.11it/s, loss=0.186]


Epoch 3 Loss: 8.8619
Epoch 4/4


Training Epoch 4: 100%|██████████| 46/46 [00:20<00:00,  2.20it/s, loss=0.112] 


Epoch 4 Loss: 6.5968
Test Metrics: Precision=0.9227, Recall=0.9227, F1=0.9227

Fine-tuning roberta-large (large) with Train Size 365, Split 5...


Map: 100%|██████████| 365/365 [00:00<00:00, 8872.59 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 7388.97 examples/s]
Map: 100%|██████████| 810/810 [00:00<00:00, 9413.47 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 46/46 [00:23<00:00,  2.00it/s, loss=0.433]


Epoch 1 Loss: 26.0681
Epoch 2/4


Training Epoch 2: 100%|██████████| 46/46 [00:22<00:00,  2.05it/s, loss=0.199]


Epoch 2 Loss: 12.0539
Epoch 3/4


Training Epoch 3: 100%|██████████| 46/46 [00:22<00:00,  2.01it/s, loss=0.16]  


Epoch 3 Loss: 8.0883
Epoch 4/4


Training Epoch 4: 100%|██████████| 46/46 [00:23<00:00,  2.00it/s, loss=0.127] 


Epoch 4 Loss: 5.7256
Test Metrics: Precision=0.9342, Recall=0.9342, F1=0.9342

Fine-tuning roberta-large (large) with Train Size 370, Split 1...


Map: 100%|██████████| 370/370 [00:00<00:00, 8807.16 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 6937.07 examples/s]
Map: 100%|██████████| 804/804 [00:00<00:00, 9191.27 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 47/47 [00:20<00:00,  2.25it/s, loss=0.239]


Epoch 1 Loss: 26.7189
Epoch 2/4


Training Epoch 2: 100%|██████████| 47/47 [00:20<00:00,  2.26it/s, loss=0.146]


Epoch 2 Loss: 12.6789
Epoch 3/4


Training Epoch 3: 100%|██████████| 47/47 [00:21<00:00,  2.21it/s, loss=0.157] 


Epoch 3 Loss: 8.5841
Epoch 4/4


Training Epoch 4: 100%|██████████| 47/47 [00:20<00:00,  2.32it/s, loss=0.0867]


Epoch 4 Loss: 5.9317
Test Metrics: Precision=0.9319, Recall=0.9319, F1=0.9319

Fine-tuning roberta-large (large) with Train Size 370, Split 2...


Map: 100%|██████████| 370/370 [00:00<00:00, 8950.82 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 6871.04 examples/s]
Map: 100%|██████████| 804/804 [00:00<00:00, 9349.41 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 47/47 [00:22<00:00,  2.13it/s, loss=0.226]


Epoch 1 Loss: 23.3043
Epoch 2/4


Training Epoch 2: 100%|██████████| 47/47 [00:22<00:00,  2.11it/s, loss=0.276]


Epoch 2 Loss: 11.1467
Epoch 3/4


Training Epoch 3: 100%|██████████| 47/47 [00:22<00:00,  2.12it/s, loss=0.0381]


Epoch 3 Loss: 7.2189
Epoch 4/4


Training Epoch 4: 100%|██████████| 47/47 [00:22<00:00,  2.12it/s, loss=0.119] 


Epoch 4 Loss: 5.0317
Test Metrics: Precision=0.9337, Recall=0.9337, F1=0.9337

Fine-tuning roberta-large (large) with Train Size 370, Split 3...


Map: 100%|██████████| 370/370 [00:00<00:00, 8722.47 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 8276.76 examples/s]
Map: 100%|██████████| 804/804 [00:00<00:00, 9456.72 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 47/47 [00:22<00:00,  2.13it/s, loss=0.46] 


Epoch 1 Loss: 26.0216
Epoch 2/4


Training Epoch 2: 100%|██████████| 47/47 [00:22<00:00,  2.13it/s, loss=0.121]


Epoch 2 Loss: 11.8854
Epoch 3/4


Training Epoch 3: 100%|██████████| 47/47 [00:21<00:00,  2.17it/s, loss=0.259] 


Epoch 3 Loss: 7.8846
Epoch 4/4


Training Epoch 4: 100%|██████████| 47/47 [00:22<00:00,  2.12it/s, loss=0.172] 


Epoch 4 Loss: 5.6621
Test Metrics: Precision=0.9320, Recall=0.9320, F1=0.9320

Fine-tuning roberta-large (large) with Train Size 370, Split 4...


Map: 100%|██████████| 370/370 [00:00<00:00, 9208.02 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 7327.16 examples/s]
Map: 100%|██████████| 804/804 [00:00<00:00, 9227.38 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 47/47 [00:21<00:00,  2.21it/s, loss=0.154]


Epoch 1 Loss: 28.5837
Epoch 2/4


Training Epoch 2: 100%|██████████| 47/47 [00:21<00:00,  2.22it/s, loss=0.215]


Epoch 2 Loss: 11.9989
Epoch 3/4


Training Epoch 3: 100%|██████████| 47/47 [00:22<00:00,  2.12it/s, loss=0.164] 


Epoch 3 Loss: 8.6882
Epoch 4/4


Training Epoch 4: 100%|██████████| 47/47 [00:20<00:00,  2.25it/s, loss=0.0338]


Epoch 4 Loss: 6.0976
Test Metrics: Precision=0.9294, Recall=0.9294, F1=0.9294

Fine-tuning roberta-large (large) with Train Size 370, Split 5...


Map: 100%|██████████| 370/370 [00:00<00:00, 9130.39 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 6997.76 examples/s]
Map: 100%|██████████| 804/804 [00:00<00:00, 9303.40 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 47/47 [00:23<00:00,  2.01it/s, loss=0.355]


Epoch 1 Loss: 27.0692
Epoch 2/4


Training Epoch 2: 100%|██████████| 47/47 [00:23<00:00,  2.02it/s, loss=0.126]


Epoch 2 Loss: 11.7661
Epoch 3/4


Training Epoch 3: 100%|██████████| 47/47 [00:22<00:00,  2.06it/s, loss=0.0525]


Epoch 3 Loss: 7.9402
Epoch 4/4


Training Epoch 4: 100%|██████████| 47/47 [00:22<00:00,  2.05it/s, loss=0.166] 


Epoch 4 Loss: 5.9451
Test Metrics: Precision=0.9334, Recall=0.9334, F1=0.9334

Fine-tuning roberta-large (large) with Train Size 375, Split 1...


Map: 100%|██████████| 375/375 [00:00<00:00, 8794.52 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 7746.76 examples/s]
Map: 100%|██████████| 798/798 [00:00<00:00, 9347.83 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 47/47 [00:21<00:00,  2.20it/s, loss=1.11] 


Epoch 1 Loss: 24.9742
Epoch 2/4


Training Epoch 2: 100%|██████████| 47/47 [00:21<00:00,  2.18it/s, loss=0.244]


Epoch 2 Loss: 11.1792
Epoch 3/4


Training Epoch 3: 100%|██████████| 47/47 [00:21<00:00,  2.15it/s, loss=0.0773]


Epoch 3 Loss: 6.9960
Epoch 4/4


Training Epoch 4: 100%|██████████| 47/47 [00:21<00:00,  2.18it/s, loss=0.0557]


Epoch 4 Loss: 4.8589
Test Metrics: Precision=0.9335, Recall=0.9335, F1=0.9335

Fine-tuning roberta-large (large) with Train Size 375, Split 2...


Map: 100%|██████████| 375/375 [00:00<00:00, 8958.10 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 6669.77 examples/s]
Map: 100%|██████████| 798/798 [00:00<00:00, 9297.35 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 47/47 [00:22<00:00,  2.11it/s, loss=0.307]


Epoch 1 Loss: 24.5118
Epoch 2/4


Training Epoch 2: 100%|██████████| 47/47 [00:22<00:00,  2.07it/s, loss=0.157]


Epoch 2 Loss: 10.5133
Epoch 3/4


Training Epoch 3: 100%|██████████| 47/47 [00:22<00:00,  2.11it/s, loss=0.155] 


Epoch 3 Loss: 6.7916
Epoch 4/4


Training Epoch 4: 100%|██████████| 47/47 [00:21<00:00,  2.14it/s, loss=0.144] 


Epoch 4 Loss: 4.2919
Test Metrics: Precision=0.9348, Recall=0.9348, F1=0.9348

Fine-tuning roberta-large (large) with Train Size 375, Split 3...


Map: 100%|██████████| 375/375 [00:00<00:00, 8636.23 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 9010.71 examples/s]
Map: 100%|██████████| 798/798 [00:00<00:00, 9181.66 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 47/47 [00:22<00:00,  2.12it/s, loss=0.277]


Epoch 1 Loss: 23.7543
Epoch 2/4


Training Epoch 2: 100%|██████████| 47/47 [00:22<00:00,  2.05it/s, loss=0.197]


Epoch 2 Loss: 10.8605
Epoch 3/4


Training Epoch 3: 100%|██████████| 47/47 [00:22<00:00,  2.09it/s, loss=0.181] 


Epoch 3 Loss: 7.5890
Epoch 4/4


Training Epoch 4: 100%|██████████| 47/47 [00:22<00:00,  2.12it/s, loss=0.0377]


Epoch 4 Loss: 5.5106
Test Metrics: Precision=0.9322, Recall=0.9322, F1=0.9322

Fine-tuning roberta-large (large) with Train Size 375, Split 4...


Map: 100%|██████████| 375/375 [00:00<00:00, 9239.21 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 7257.42 examples/s]
Map: 100%|██████████| 798/798 [00:00<00:00, 9166.70 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 47/47 [00:21<00:00,  2.14it/s, loss=0.259]


Epoch 1 Loss: 26.9895
Epoch 2/4


Training Epoch 2: 100%|██████████| 47/47 [00:21<00:00,  2.16it/s, loss=0.283]


Epoch 2 Loss: 12.8534
Epoch 3/4


Training Epoch 3: 100%|██████████| 47/47 [00:21<00:00,  2.15it/s, loss=0.138] 


Epoch 3 Loss: 8.6737
Epoch 4/4


Training Epoch 4: 100%|██████████| 47/47 [00:22<00:00,  2.11it/s, loss=0.392] 


Epoch 4 Loss: 7.0792
Test Metrics: Precision=0.9279, Recall=0.9279, F1=0.9279

Fine-tuning roberta-large (large) with Train Size 375, Split 5...


Map: 100%|██████████| 375/375 [00:00<00:00, 9072.61 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 7253.74 examples/s]
Map: 100%|██████████| 798/798 [00:00<00:00, 9540.93 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 47/47 [00:22<00:00,  2.06it/s, loss=0.249]


Epoch 1 Loss: 26.5495
Epoch 2/4


Training Epoch 2: 100%|██████████| 47/47 [00:23<00:00,  2.02it/s, loss=0.108]


Epoch 2 Loss: 12.8461
Epoch 3/4


Training Epoch 3: 100%|██████████| 47/47 [00:22<00:00,  2.04it/s, loss=0.226] 


Epoch 3 Loss: 8.4056
Epoch 4/4


Training Epoch 4: 100%|██████████| 47/47 [00:22<00:00,  2.08it/s, loss=0.083] 


Epoch 4 Loss: 5.9434
Test Metrics: Precision=0.9332, Recall=0.9332, F1=0.9332

Fine-tuning roberta-large (large) with Train Size 380, Split 1...


Map: 100%|██████████| 380/380 [00:00<00:00, 8773.63 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7051.59 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9415.36 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 48/48 [00:21<00:00,  2.23it/s, loss=0.473]


Epoch 1 Loss: 25.1871
Epoch 2/4


Training Epoch 2: 100%|██████████| 48/48 [00:22<00:00,  2.18it/s, loss=0.21]  


Epoch 2 Loss: 11.4290
Epoch 3/4


Training Epoch 3: 100%|██████████| 48/48 [00:22<00:00,  2.18it/s, loss=0.108] 


Epoch 3 Loss: 7.5489
Epoch 4/4


Training Epoch 4: 100%|██████████| 48/48 [00:21<00:00,  2.19it/s, loss=0.102] 


Epoch 4 Loss: 4.9200
Test Metrics: Precision=0.9323, Recall=0.9323, F1=0.9323

Fine-tuning roberta-large (large) with Train Size 380, Split 2...


Map: 100%|██████████| 380/380 [00:00<00:00, 8840.20 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7631.30 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9301.23 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 48/48 [00:22<00:00,  2.16it/s, loss=0.337]


Epoch 1 Loss: 25.7338
Epoch 2/4


Training Epoch 2: 100%|██████████| 48/48 [00:21<00:00,  2.21it/s, loss=0.325]


Epoch 2 Loss: 12.0960
Epoch 3/4


Training Epoch 3: 100%|██████████| 48/48 [00:22<00:00,  2.14it/s, loss=0.396] 


Epoch 3 Loss: 8.5646
Epoch 4/4


Training Epoch 4: 100%|██████████| 48/48 [00:22<00:00,  2.13it/s, loss=0.204] 


Epoch 4 Loss: 5.8503
Test Metrics: Precision=0.9297, Recall=0.9297, F1=0.9297

Fine-tuning roberta-large (large) with Train Size 380, Split 3...


Map: 100%|██████████| 380/380 [00:00<00:00, 8869.82 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 8322.25 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9545.41 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 48/48 [00:23<00:00,  2.05it/s, loss=0.376]


Epoch 1 Loss: 24.6148
Epoch 2/4


Training Epoch 2: 100%|██████████| 48/48 [00:22<00:00,  2.09it/s, loss=0.169]


Epoch 2 Loss: 11.6077
Epoch 3/4


Training Epoch 3: 100%|██████████| 48/48 [00:22<00:00,  2.13it/s, loss=0.4]   


Epoch 3 Loss: 7.9377
Epoch 4/4


Training Epoch 4: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s, loss=0.101] 


Epoch 4 Loss: 5.4125
Test Metrics: Precision=0.9341, Recall=0.9341, F1=0.9341

Fine-tuning roberta-large (large) with Train Size 380, Split 4...


Map: 100%|██████████| 380/380 [00:00<00:00, 9242.73 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7639.71 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9053.54 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 48/48 [00:21<00:00,  2.18it/s, loss=0.201]


Epoch 1 Loss: 24.9683
Epoch 2/4


Training Epoch 2: 100%|██████████| 48/48 [00:21<00:00,  2.18it/s, loss=0.0915]


Epoch 2 Loss: 11.5819
Epoch 3/4


Training Epoch 3: 100%|██████████| 48/48 [00:22<00:00,  2.10it/s, loss=0.36]  


Epoch 3 Loss: 7.9401
Epoch 4/4


Training Epoch 4: 100%|██████████| 48/48 [00:22<00:00,  2.15it/s, loss=0.15]  


Epoch 4 Loss: 5.8773
Test Metrics: Precision=0.9313, Recall=0.9313, F1=0.9313

Fine-tuning roberta-large (large) with Train Size 380, Split 5...


Map: 100%|██████████| 380/380 [00:00<00:00, 8768.42 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 6825.26 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9457.55 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 48/48 [00:24<00:00,  1.94it/s, loss=0.275]


Epoch 1 Loss: 23.5398
Epoch 2/4


Training Epoch 2: 100%|██████████| 48/48 [00:24<00:00,  2.00it/s, loss=0.233]


Epoch 2 Loss: 12.4343
Epoch 3/4


Training Epoch 3: 100%|██████████| 48/48 [00:22<00:00,  2.12it/s, loss=0.0941]


Epoch 3 Loss: 8.4441
Epoch 4/4


Training Epoch 4: 100%|██████████| 48/48 [00:23<00:00,  2.01it/s, loss=0.0224]


Epoch 4 Loss: 6.2962
Test Metrics: Precision=0.9364, Recall=0.9364, F1=0.9364

Fine-tuning roberta-large (large) with Train Size 385, Split 1...


Map: 100%|██████████| 385/385 [00:00<00:00, 8831.51 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 7576.81 examples/s]
Map: 100%|██████████| 786/786 [00:00<00:00, 9159.40 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 49/49 [00:22<00:00,  2.21it/s, loss=0.303]


Epoch 1 Loss: 26.5540
Epoch 2/4


Training Epoch 2: 100%|██████████| 49/49 [00:22<00:00,  2.21it/s, loss=1.09] 


Epoch 2 Loss: 14.2333
Epoch 3/4


Training Epoch 3: 100%|██████████| 49/49 [00:21<00:00,  2.26it/s, loss=0.202]


Epoch 3 Loss: 11.3896
Epoch 4/4


Training Epoch 4: 100%|██████████| 49/49 [00:22<00:00,  2.20it/s, loss=0.0256]


Epoch 4 Loss: 8.8771
Test Metrics: Precision=0.9289, Recall=0.9289, F1=0.9289

Fine-tuning roberta-large (large) with Train Size 385, Split 2...


Map: 100%|██████████| 385/385 [00:00<00:00, 8784.29 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 6959.18 examples/s]
Map: 100%|██████████| 786/786 [00:00<00:00, 9468.04 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 49/49 [00:22<00:00,  2.19it/s, loss=0.602]


Epoch 1 Loss: 23.8866
Epoch 2/4


Training Epoch 2: 100%|██████████| 49/49 [00:22<00:00,  2.14it/s, loss=0.0028]


Epoch 2 Loss: 11.4837
Epoch 3/4


Training Epoch 3: 100%|██████████| 49/49 [00:22<00:00,  2.19it/s, loss=0.0667]


Epoch 3 Loss: 7.4702
Epoch 4/4


Training Epoch 4: 100%|██████████| 49/49 [00:22<00:00,  2.19it/s, loss=0.0765]


Epoch 4 Loss: 5.2590
Test Metrics: Precision=0.9333, Recall=0.9333, F1=0.9333

Fine-tuning roberta-large (large) with Train Size 385, Split 3...


Map: 100%|██████████| 385/385 [00:00<00:00, 8607.90 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 7601.42 examples/s]
Map: 100%|██████████| 786/786 [00:00<00:00, 9177.93 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 49/49 [00:23<00:00,  2.11it/s, loss=0.0391]


Epoch 1 Loss: 25.7436
Epoch 2/4


Training Epoch 2: 100%|██████████| 49/49 [00:23<00:00,  2.11it/s, loss=0.359]


Epoch 2 Loss: 11.4631
Epoch 3/4


Training Epoch 3: 100%|██████████| 49/49 [00:22<00:00,  2.19it/s, loss=0.0269]


Epoch 3 Loss: 7.8411
Epoch 4/4


Training Epoch 4: 100%|██████████| 49/49 [00:22<00:00,  2.14it/s, loss=0.0497]


Epoch 4 Loss: 5.7191
Test Metrics: Precision=0.9328, Recall=0.9328, F1=0.9328

Fine-tuning roberta-large (large) with Train Size 385, Split 4...


Map: 100%|██████████| 385/385 [00:00<00:00, 8893.97 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 6944.96 examples/s]
Map: 100%|██████████| 786/786 [00:00<00:00, 9161.58 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 49/49 [00:22<00:00,  2.17it/s, loss=0.044]


Epoch 1 Loss: 27.7389
Epoch 2/4


Training Epoch 2: 100%|██████████| 49/49 [00:23<00:00,  2.13it/s, loss=0.809]


Epoch 2 Loss: 12.9940
Epoch 3/4


Training Epoch 3: 100%|██████████| 49/49 [00:22<00:00,  2.21it/s, loss=0.115] 


Epoch 3 Loss: 10.0244
Epoch 4/4


Training Epoch 4: 100%|██████████| 49/49 [00:22<00:00,  2.22it/s, loss=0.0319]


Epoch 4 Loss: 7.1953
Test Metrics: Precision=0.9313, Recall=0.9313, F1=0.9313

Fine-tuning roberta-large (large) with Train Size 385, Split 5...


Map: 100%|██████████| 385/385 [00:00<00:00, 9031.97 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 7006.73 examples/s]
Map: 100%|██████████| 786/786 [00:00<00:00, 9580.60 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 49/49 [00:23<00:00,  2.05it/s, loss=0.213]


Epoch 1 Loss: 29.0224
Epoch 2/4


Training Epoch 2: 100%|██████████| 49/49 [00:24<00:00,  2.03it/s, loss=0.0681]


Epoch 2 Loss: 14.7657
Epoch 3/4


Training Epoch 3: 100%|██████████| 49/49 [00:24<00:00,  2.04it/s, loss=0.304] 


Epoch 3 Loss: 9.6788
Epoch 4/4


Training Epoch 4: 100%|██████████| 49/49 [00:23<00:00,  2.09it/s, loss=0.178] 


Epoch 4 Loss: 6.8216
Test Metrics: Precision=0.9348, Recall=0.9348, F1=0.9348

Fine-tuning roberta-large (large) with Train Size 390, Split 1...


Map: 100%|██████████| 390/390 [00:00<00:00, 9001.94 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 7551.72 examples/s]
Map: 100%|██████████| 780/780 [00:00<00:00, 9223.94 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 49/49 [00:22<00:00,  2.17it/s, loss=0.371]


Epoch 1 Loss: 23.2630
Epoch 2/4


Training Epoch 2: 100%|██████████| 49/49 [00:22<00:00,  2.19it/s, loss=0.396]


Epoch 2 Loss: 11.2637
Epoch 3/4


Training Epoch 3: 100%|██████████| 49/49 [00:22<00:00,  2.23it/s, loss=0.133] 


Epoch 3 Loss: 7.2841
Epoch 4/4


Training Epoch 4: 100%|██████████| 49/49 [00:22<00:00,  2.22it/s, loss=0.0686]


Epoch 4 Loss: 4.9524
Test Metrics: Precision=0.9362, Recall=0.9362, F1=0.9362

Fine-tuning roberta-large (large) with Train Size 390, Split 2...


Map: 100%|██████████| 390/390 [00:00<00:00, 8803.26 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 7012.53 examples/s]
Map: 100%|██████████| 780/780 [00:00<00:00, 9020.13 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 49/49 [00:22<00:00,  2.15it/s, loss=0.249]


Epoch 1 Loss: 25.4740
Epoch 2/4


Training Epoch 2: 100%|██████████| 49/49 [00:22<00:00,  2.16it/s, loss=0.316]


Epoch 2 Loss: 11.5140
Epoch 3/4


Training Epoch 3: 100%|██████████| 49/49 [00:22<00:00,  2.19it/s, loss=0.197] 


Epoch 3 Loss: 7.9968
Epoch 4/4


Training Epoch 4: 100%|██████████| 49/49 [00:23<00:00,  2.13it/s, loss=0.164] 


Epoch 4 Loss: 5.3812
Test Metrics: Precision=0.9332, Recall=0.9332, F1=0.9332

Fine-tuning roberta-large (large) with Train Size 390, Split 3...


Map: 100%|██████████| 390/390 [00:00<00:00, 8726.01 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 7809.13 examples/s]
Map: 100%|██████████| 780/780 [00:00<00:00, 9684.58 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 49/49 [00:23<00:00,  2.11it/s, loss=0.375]


Epoch 1 Loss: 27.0002
Epoch 2/4


Training Epoch 2: 100%|██████████| 49/49 [00:23<00:00,  2.08it/s, loss=0.172]


Epoch 2 Loss: 12.3546
Epoch 3/4


Training Epoch 3: 100%|██████████| 49/49 [00:22<00:00,  2.16it/s, loss=0.174] 


Epoch 3 Loss: 8.3276
Epoch 4/4


Training Epoch 4: 100%|██████████| 49/49 [00:24<00:00,  2.03it/s, loss=0.18]  


Epoch 4 Loss: 6.3155
Test Metrics: Precision=0.9319, Recall=0.9319, F1=0.9319

Fine-tuning roberta-large (large) with Train Size 390, Split 4...


Map: 100%|██████████| 390/390 [00:00<00:00, 8938.49 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 7145.63 examples/s]
Map: 100%|██████████| 780/780 [00:00<00:00, 9396.33 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 49/49 [00:23<00:00,  2.12it/s, loss=0.433]


Epoch 1 Loss: 24.6608
Epoch 2/4


Training Epoch 2: 100%|██████████| 49/49 [00:22<00:00,  2.18it/s, loss=0.182]


Epoch 2 Loss: 11.3901
Epoch 3/4


Training Epoch 3: 100%|██████████| 49/49 [00:23<00:00,  2.10it/s, loss=0.145] 


Epoch 3 Loss: 7.6819
Epoch 4/4


Training Epoch 4: 100%|██████████| 49/49 [00:23<00:00,  2.13it/s, loss=0.103] 


Epoch 4 Loss: 5.6792
Test Metrics: Precision=0.9320, Recall=0.9320, F1=0.9320

Fine-tuning roberta-large (large) with Train Size 390, Split 5...


Map: 100%|██████████| 390/390 [00:00<00:00, 9181.46 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 7627.96 examples/s]
Map: 100%|██████████| 780/780 [00:00<00:00, 9372.69 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 49/49 [00:24<00:00,  2.00it/s, loss=0.395]


Epoch 1 Loss: 29.2705
Epoch 2/4


Training Epoch 2: 100%|██████████| 49/49 [00:24<00:00,  2.01it/s, loss=0.207]


Epoch 2 Loss: 12.7919
Epoch 3/4


Training Epoch 3: 100%|██████████| 49/49 [00:24<00:00,  2.01it/s, loss=0.296] 


Epoch 3 Loss: 8.6960
Epoch 4/4


Training Epoch 4: 100%|██████████| 49/49 [00:24<00:00,  2.03it/s, loss=0.132] 


Epoch 4 Loss: 6.3362
Test Metrics: Precision=0.9339, Recall=0.9339, F1=0.9339

Fine-tuning roberta-large (large) with Train Size 395, Split 1...


Map: 100%|██████████| 395/395 [00:00<00:00, 9197.06 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 6947.56 examples/s]
Map: 100%|██████████| 774/774 [00:00<00:00, 9361.07 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 50/50 [00:22<00:00,  2.25it/s, loss=0.254]


Epoch 1 Loss: 26.7636
Epoch 2/4


Training Epoch 2: 100%|██████████| 50/50 [00:22<00:00,  2.24it/s, loss=0.354] 


Epoch 2 Loss: 20.8349
Epoch 3/4


Training Epoch 3: 100%|██████████| 50/50 [00:22<00:00,  2.22it/s, loss=0.186]


Epoch 3 Loss: 18.0534
Epoch 4/4


Training Epoch 4: 100%|██████████| 50/50 [00:22<00:00,  2.21it/s, loss=0.165]


Epoch 4 Loss: 13.1681
Test Metrics: Precision=0.9158, Recall=0.9158, F1=0.9158

Fine-tuning roberta-large (large) with Train Size 395, Split 2...


Map: 100%|██████████| 395/395 [00:00<00:00, 8793.84 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 7581.69 examples/s]
Map: 100%|██████████| 774/774 [00:00<00:00, 9388.47 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 50/50 [00:23<00:00,  2.13it/s, loss=0.0731]


Epoch 1 Loss: 24.4498
Epoch 2/4


Training Epoch 2: 100%|██████████| 50/50 [00:22<00:00,  2.20it/s, loss=0.178] 


Epoch 2 Loss: 10.9573
Epoch 3/4


Training Epoch 3: 100%|██████████| 50/50 [00:23<00:00,  2.16it/s, loss=0.182] 


Epoch 3 Loss: 7.9113
Epoch 4/4


Training Epoch 4: 100%|██████████| 50/50 [00:23<00:00,  2.17it/s, loss=0.104] 


Epoch 4 Loss: 5.1392
Test Metrics: Precision=0.9315, Recall=0.9315, F1=0.9315

Fine-tuning roberta-large (large) with Train Size 395, Split 3...


Map: 100%|██████████| 395/395 [00:00<00:00, 8680.04 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 7445.40 examples/s]
Map: 100%|██████████| 774/774 [00:00<00:00, 9686.93 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 50/50 [00:23<00:00,  2.17it/s, loss=0.285]


Epoch 1 Loss: 23.9433
Epoch 2/4


Training Epoch 2: 100%|██████████| 50/50 [00:22<00:00,  2.21it/s, loss=0.238]


Epoch 2 Loss: 11.0170
Epoch 3/4


Training Epoch 3: 100%|██████████| 50/50 [00:23<00:00,  2.16it/s, loss=0.143] 


Epoch 3 Loss: 6.8497
Epoch 4/4


Training Epoch 4: 100%|██████████| 50/50 [00:22<00:00,  2.18it/s, loss=0.156] 


Epoch 4 Loss: 4.6111
Test Metrics: Precision=0.9342, Recall=0.9342, F1=0.9342

Fine-tuning roberta-large (large) with Train Size 395, Split 4...


Map: 100%|██████████| 395/395 [00:00<00:00, 9188.44 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 8159.92 examples/s]
Map: 100%|██████████| 774/774 [00:00<00:00, 9216.16 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 50/50 [00:23<00:00,  2.12it/s, loss=0.503]


Epoch 1 Loss: 26.4819
Epoch 2/4


Training Epoch 2: 100%|██████████| 50/50 [00:23<00:00,  2.16it/s, loss=0.00445]


Epoch 2 Loss: 11.6438
Epoch 3/4


Training Epoch 3: 100%|██████████| 50/50 [00:22<00:00,  2.18it/s, loss=0.111] 


Epoch 3 Loss: 8.1691
Epoch 4/4


Training Epoch 4: 100%|██████████| 50/50 [00:23<00:00,  2.12it/s, loss=0.111] 


Epoch 4 Loss: 6.0072
Test Metrics: Precision=0.9323, Recall=0.9323, F1=0.9323

Fine-tuning roberta-large (large) with Train Size 395, Split 5...


Map: 100%|██████████| 395/395 [00:00<00:00, 8777.67 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 8045.99 examples/s]
Map: 100%|██████████| 774/774 [00:00<00:00, 9193.61 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 50/50 [00:24<00:00,  2.01it/s, loss=1.16] 


Epoch 1 Loss: 27.0762
Epoch 2/4


Training Epoch 2: 100%|██████████| 50/50 [00:24<00:00,  2.07it/s, loss=0.224]


Epoch 2 Loss: 14.4003
Epoch 3/4


Training Epoch 3: 100%|██████████| 50/50 [00:24<00:00,  2.07it/s, loss=0.261] 


Epoch 3 Loss: 9.4364
Epoch 4/4


Training Epoch 4: 100%|██████████| 50/50 [00:24<00:00,  2.04it/s, loss=0.0643]


Epoch 4 Loss: 6.8388
Test Metrics: Precision=0.9347, Recall=0.9347, F1=0.9347

Fine-tuning roberta-large (large) with Train Size 400, Split 1...


Map: 100%|██████████| 400/400 [00:00<00:00, 8986.96 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7481.31 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9341.68 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 50/50 [00:22<00:00,  2.23it/s, loss=0.319]


Epoch 1 Loss: 24.2986
Epoch 2/4


Training Epoch 2: 100%|██████████| 50/50 [00:22<00:00,  2.18it/s, loss=0.345] 


Epoch 2 Loss: 11.1237
Epoch 3/4


Training Epoch 3: 100%|██████████| 50/50 [00:22<00:00,  2.23it/s, loss=0.0938]


Epoch 3 Loss: 7.3664
Epoch 4/4


Training Epoch 4: 100%|██████████| 50/50 [00:23<00:00,  2.16it/s, loss=0.152] 


Epoch 4 Loss: 5.0901
Test Metrics: Precision=0.9360, Recall=0.9360, F1=0.9360

Fine-tuning roberta-large (large) with Train Size 400, Split 2...


Map: 100%|██████████| 400/400 [00:00<00:00, 8912.82 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7916.96 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9123.11 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 50/50 [00:23<00:00,  2.14it/s, loss=0.473]


Epoch 1 Loss: 27.4676
Epoch 2/4


Training Epoch 2: 100%|██████████| 50/50 [00:22<00:00,  2.25it/s, loss=0.27] 


Epoch 2 Loss: 14.0937
Epoch 3/4


Training Epoch 3: 100%|██████████| 50/50 [00:23<00:00,  2.11it/s, loss=0.189] 


Epoch 3 Loss: 9.9286
Epoch 4/4


Training Epoch 4: 100%|██████████| 50/50 [00:23<00:00,  2.13it/s, loss=0.127] 


Epoch 4 Loss: 7.4150
Test Metrics: Precision=0.9285, Recall=0.9285, F1=0.9285

Fine-tuning roberta-large (large) with Train Size 400, Split 3...


Map: 100%|██████████| 400/400 [00:00<00:00, 8767.77 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 6959.91 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9638.64 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 50/50 [00:23<00:00,  2.09it/s, loss=0.327]


Epoch 1 Loss: 24.2664
Epoch 2/4


Training Epoch 2: 100%|██████████| 50/50 [00:23<00:00,  2.12it/s, loss=0.446]


Epoch 2 Loss: 10.9147
Epoch 3/4


Training Epoch 3: 100%|██████████| 50/50 [00:23<00:00,  2.12it/s, loss=0.11]  


Epoch 3 Loss: 7.3168
Epoch 4/4


Training Epoch 4: 100%|██████████| 50/50 [00:23<00:00,  2.12it/s, loss=0.0951]


Epoch 4 Loss: 4.8227
Test Metrics: Precision=0.9352, Recall=0.9352, F1=0.9352

Fine-tuning roberta-large (large) with Train Size 400, Split 4...


Map: 100%|██████████| 400/400 [00:00<00:00, 9246.85 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7051.92 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9323.08 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 50/50 [00:22<00:00,  2.20it/s, loss=0.378]


Epoch 1 Loss: 26.0778
Epoch 2/4


Training Epoch 2: 100%|██████████| 50/50 [00:22<00:00,  2.19it/s, loss=0.292] 


Epoch 2 Loss: 12.5949
Epoch 3/4


Training Epoch 3: 100%|██████████| 50/50 [00:23<00:00,  2.16it/s, loss=0.28]  


Epoch 3 Loss: 8.4141
Epoch 4/4


Training Epoch 4: 100%|██████████| 50/50 [00:23<00:00,  2.16it/s, loss=0.11]  


Epoch 4 Loss: 6.2445
Test Metrics: Precision=0.9320, Recall=0.9320, F1=0.9320

Fine-tuning roberta-large (large) with Train Size 400, Split 5...


Map: 100%|██████████| 400/400 [00:00<00:00, 8696.69 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 6904.63 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9196.91 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 50/50 [00:24<00:00,  2.02it/s, loss=0.369]


Epoch 1 Loss: 25.9684
Epoch 2/4


Training Epoch 2: 100%|██████████| 50/50 [00:24<00:00,  2.02it/s, loss=0.164]


Epoch 2 Loss: 11.9650
Epoch 3/4


Training Epoch 3: 100%|██████████| 50/50 [00:24<00:00,  2.02it/s, loss=0.161] 


Epoch 3 Loss: 7.6587
Epoch 4/4


Training Epoch 4: 100%|██████████| 50/50 [00:23<00:00,  2.10it/s, loss=0.231] 


Epoch 4 Loss: 5.5816
Test Metrics: Precision=0.9369, Recall=0.9369, F1=0.9369

Fine-tuning roberta-large (large) with Train Size 405, Split 1...


Map: 100%|██████████| 405/405 [00:00<00:00, 8891.63 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 6836.61 examples/s]
Map: 100%|██████████| 762/762 [00:00<00:00, 9504.53 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 51/51 [00:23<00:00,  2.19it/s, loss=0.328]


Epoch 1 Loss: 27.3341
Epoch 2/4


Training Epoch 2: 100%|██████████| 51/51 [00:23<00:00,  2.19it/s, loss=0.355]


Epoch 2 Loss: 12.5045
Epoch 3/4


Training Epoch 3: 100%|██████████| 51/51 [00:23<00:00,  2.17it/s, loss=0.0625]


Epoch 3 Loss: 8.2857
Epoch 4/4


Training Epoch 4: 100%|██████████| 51/51 [00:23<00:00,  2.19it/s, loss=0.0713]


Epoch 4 Loss: 5.8559
Test Metrics: Precision=0.9336, Recall=0.9336, F1=0.9336

Fine-tuning roberta-large (large) with Train Size 405, Split 2...


Map: 100%|██████████| 405/405 [00:00<00:00, 8956.23 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 7862.14 examples/s]
Map: 100%|██████████| 762/762 [00:00<00:00, 9314.02 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 51/51 [00:23<00:00,  2.15it/s, loss=0.58] 


Epoch 1 Loss: 28.0549
Epoch 2/4


Training Epoch 2: 100%|██████████| 51/51 [00:24<00:00,  2.12it/s, loss=0.393]


Epoch 2 Loss: 14.6743
Epoch 3/4


Training Epoch 3: 100%|██████████| 51/51 [00:23<00:00,  2.21it/s, loss=0.183]


Epoch 3 Loss: 10.9036
Epoch 4/4


Training Epoch 4: 100%|██████████| 51/51 [00:23<00:00,  2.18it/s, loss=0.208] 


Epoch 4 Loss: 8.2883
Test Metrics: Precision=0.9267, Recall=0.9267, F1=0.9267

Fine-tuning roberta-large (large) with Train Size 405, Split 3...


Map: 100%|██████████| 405/405 [00:00<00:00, 8694.30 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 6221.41 examples/s]
Map: 100%|██████████| 762/762 [00:00<00:00, 9312.61 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 51/51 [00:24<00:00,  2.11it/s, loss=0.19] 


Epoch 1 Loss: 25.2042
Epoch 2/4


Training Epoch 2: 100%|██████████| 51/51 [00:24<00:00,  2.10it/s, loss=0.293]


Epoch 2 Loss: 11.1481
Epoch 3/4


Training Epoch 3: 100%|██████████| 51/51 [00:24<00:00,  2.08it/s, loss=0.172] 


Epoch 3 Loss: 7.0423
Epoch 4/4


Training Epoch 4: 100%|██████████| 51/51 [00:24<00:00,  2.10it/s, loss=0.0569]


Epoch 4 Loss: 4.7630
Test Metrics: Precision=0.9359, Recall=0.9359, F1=0.9359

Fine-tuning roberta-large (large) with Train Size 405, Split 4...


Map: 100%|██████████| 405/405 [00:00<00:00, 9081.10 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 7043.11 examples/s]
Map: 100%|██████████| 762/762 [00:00<00:00, 9376.90 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 51/51 [00:23<00:00,  2.13it/s, loss=0.277]


Epoch 1 Loss: 25.1749
Epoch 2/4


Training Epoch 2: 100%|██████████| 51/51 [00:24<00:00,  2.09it/s, loss=0.257]


Epoch 2 Loss: 12.2988
Epoch 3/4


Training Epoch 3: 100%|██████████| 51/51 [00:23<00:00,  2.16it/s, loss=0.128] 


Epoch 3 Loss: 8.1886
Epoch 4/4


Training Epoch 4: 100%|██████████| 51/51 [00:23<00:00,  2.18it/s, loss=0.151] 


Epoch 4 Loss: 5.8339
Test Metrics: Precision=0.9297, Recall=0.9297, F1=0.9297

Fine-tuning roberta-large (large) with Train Size 405, Split 5...


Map: 100%|██████████| 405/405 [00:00<00:00, 8924.71 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 7438.83 examples/s]
Map: 100%|██████████| 762/762 [00:00<00:00, 9388.14 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 51/51 [00:25<00:00,  2.02it/s, loss=0.307]


Epoch 1 Loss: 25.4137
Epoch 2/4


Training Epoch 2: 100%|██████████| 51/51 [00:25<00:00,  1.98it/s, loss=0.196] 


Epoch 2 Loss: 12.3336
Epoch 3/4


Training Epoch 3: 100%|██████████| 51/51 [00:24<00:00,  2.08it/s, loss=0.492] 


Epoch 3 Loss: 8.3844
Epoch 4/4


Training Epoch 4: 100%|██████████| 51/51 [00:24<00:00,  2.06it/s, loss=0.0474]


Epoch 4 Loss: 6.6724
Test Metrics: Precision=0.9356, Recall=0.9356, F1=0.9356

Fine-tuning roberta-large (large) with Train Size 410, Split 1...


Map: 100%|██████████| 410/410 [00:00<00:00, 2440.75 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 7509.62 examples/s]
Map: 100%|██████████| 756/756 [00:00<00:00, 9179.71 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 52/52 [00:23<00:00,  2.20it/s, loss=0.433]


Epoch 1 Loss: 26.1108
Epoch 2/4


Training Epoch 2: 100%|██████████| 52/52 [00:24<00:00,  2.16it/s, loss=0.278]


Epoch 2 Loss: 12.7555
Epoch 3/4


Training Epoch 3: 100%|██████████| 52/52 [00:23<00:00,  2.23it/s, loss=0.313] 


Epoch 3 Loss: 8.7960
Epoch 4/4


Training Epoch 4: 100%|██████████| 52/52 [00:23<00:00,  2.19it/s, loss=0.128] 


Epoch 4 Loss: 5.9567
Test Metrics: Precision=0.9338, Recall=0.9338, F1=0.9338

Fine-tuning roberta-large (large) with Train Size 410, Split 2...


Map: 100%|██████████| 410/410 [00:00<00:00, 9070.77 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 8279.96 examples/s]
Map: 100%|██████████| 756/756 [00:00<00:00, 9033.63 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 52/52 [00:24<00:00,  2.17it/s, loss=0.801]


Epoch 1 Loss: 27.9664
Epoch 2/4


Training Epoch 2: 100%|██████████| 52/52 [00:23<00:00,  2.18it/s, loss=0.34] 


Epoch 2 Loss: 13.4694
Epoch 3/4


Training Epoch 3: 100%|██████████| 52/52 [00:23<00:00,  2.18it/s, loss=0.184] 


Epoch 3 Loss: 9.3941
Epoch 4/4


Training Epoch 4: 100%|██████████| 52/52 [00:23<00:00,  2.23it/s, loss=0.0653]


Epoch 4 Loss: 6.5227
Test Metrics: Precision=0.9329, Recall=0.9329, F1=0.9329

Fine-tuning roberta-large (large) with Train Size 410, Split 3...


Map: 100%|██████████| 410/410 [00:00<00:00, 8813.23 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 7232.01 examples/s]
Map: 100%|██████████| 756/756 [00:00<00:00, 9435.70 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 52/52 [00:24<00:00,  2.15it/s, loss=0.402]


Epoch 1 Loss: 27.6623
Epoch 2/4


Training Epoch 2: 100%|██████████| 52/52 [00:25<00:00,  2.07it/s, loss=0.264]


Epoch 2 Loss: 12.4333
Epoch 3/4


Training Epoch 3: 100%|██████████| 52/52 [00:24<00:00,  2.13it/s, loss=0.0296]


Epoch 3 Loss: 7.4360
Epoch 4/4


Training Epoch 4: 100%|██████████| 52/52 [00:24<00:00,  2.11it/s, loss=0.0314]


Epoch 4 Loss: 4.9870
Test Metrics: Precision=0.9339, Recall=0.9339, F1=0.9339

Fine-tuning roberta-large (large) with Train Size 410, Split 4...


Map: 100%|██████████| 410/410 [00:00<00:00, 8758.16 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 7659.81 examples/s]
Map: 100%|██████████| 756/756 [00:00<00:00, 9071.02 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 52/52 [00:24<00:00,  2.16it/s, loss=0.298]


Epoch 1 Loss: 26.6271
Epoch 2/4


Training Epoch 2: 100%|██████████| 52/52 [00:24<00:00,  2.15it/s, loss=0.152]


Epoch 2 Loss: 11.9161
Epoch 3/4


Training Epoch 3: 100%|██████████| 52/52 [00:23<00:00,  2.25it/s, loss=0.053] 


Epoch 3 Loss: 8.1996
Epoch 4/4


Training Epoch 4: 100%|██████████| 52/52 [00:24<00:00,  2.14it/s, loss=0.14]  


Epoch 4 Loss: 5.9551
Test Metrics: Precision=0.9312, Recall=0.9312, F1=0.9312

Fine-tuning roberta-large (large) with Train Size 410, Split 5...


Map: 100%|██████████| 410/410 [00:00<00:00, 9060.59 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 8141.58 examples/s]
Map: 100%|██████████| 756/756 [00:00<00:00, 9223.13 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 52/52 [00:25<00:00,  2.07it/s, loss=0.552]


Epoch 1 Loss: 29.4036
Epoch 2/4


Training Epoch 2: 100%|██████████| 52/52 [00:24<00:00,  2.09it/s, loss=0.054]


Epoch 2 Loss: 13.9020
Epoch 3/4


Training Epoch 3: 100%|██████████| 52/52 [00:25<00:00,  2.01it/s, loss=0.163] 


Epoch 3 Loss: 9.2953
Epoch 4/4


Training Epoch 4: 100%|██████████| 52/52 [00:25<00:00,  2.02it/s, loss=0.161] 


Epoch 4 Loss: 7.0521
Test Metrics: Precision=0.9358, Recall=0.9358, F1=0.9358

Fine-tuning roberta-large (large) with Train Size 415, Split 1...


Map: 100%|██████████| 415/415 [00:00<00:00, 8863.21 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 7584.80 examples/s]
Map: 100%|██████████| 750/750 [00:00<00:00, 9356.94 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 52/52 [00:23<00:00,  2.20it/s, loss=0.341]


Epoch 1 Loss: 27.2613
Epoch 2/4


Training Epoch 2: 100%|██████████| 52/52 [00:23<00:00,  2.20it/s, loss=0.135]


Epoch 2 Loss: 11.8361
Epoch 3/4


Training Epoch 3: 100%|██████████| 52/52 [00:23<00:00,  2.17it/s, loss=0.127] 


Epoch 3 Loss: 8.2335
Epoch 4/4


Training Epoch 4: 100%|██████████| 52/52 [00:23<00:00,  2.18it/s, loss=0.248] 


Epoch 4 Loss: 5.5027
Test Metrics: Precision=0.9328, Recall=0.9328, F1=0.9328

Fine-tuning roberta-large (large) with Train Size 415, Split 2...


Map: 100%|██████████| 415/415 [00:00<00:00, 9163.75 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 6849.53 examples/s]
Map: 100%|██████████| 750/750 [00:00<00:00, 9362.34 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 52/52 [00:23<00:00,  2.21it/s, loss=0.704]


Epoch 1 Loss: 27.4820
Epoch 2/4


Training Epoch 2: 100%|██████████| 52/52 [00:24<00:00,  2.15it/s, loss=0.186]


Epoch 2 Loss: 13.0875
Epoch 3/4


Training Epoch 3: 100%|██████████| 52/52 [00:23<00:00,  2.18it/s, loss=0.0952]


Epoch 3 Loss: 8.7136
Epoch 4/4


Training Epoch 4: 100%|██████████| 52/52 [00:24<00:00,  2.10it/s, loss=0.122] 


Epoch 4 Loss: 6.1731
Test Metrics: Precision=0.9334, Recall=0.9334, F1=0.9334

Fine-tuning roberta-large (large) with Train Size 415, Split 3...


Map: 100%|██████████| 415/415 [00:00<00:00, 9060.01 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 7052.96 examples/s]
Map: 100%|██████████| 750/750 [00:00<00:00, 9383.23 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 52/52 [00:25<00:00,  2.08it/s, loss=0.35] 


Epoch 1 Loss: 25.3676
Epoch 2/4


Training Epoch 2: 100%|██████████| 52/52 [00:24<00:00,  2.14it/s, loss=0.284]


Epoch 2 Loss: 11.4219
Epoch 3/4


Training Epoch 3: 100%|██████████| 52/52 [00:25<00:00,  2.07it/s, loss=0.141] 


Epoch 3 Loss: 7.2301
Epoch 4/4


Training Epoch 4: 100%|██████████| 52/52 [00:24<00:00,  2.09it/s, loss=0.138] 


Epoch 4 Loss: 4.9515
Test Metrics: Precision=0.9351, Recall=0.9351, F1=0.9351

Fine-tuning roberta-large (large) with Train Size 415, Split 4...


Map: 100%|██████████| 415/415 [00:00<00:00, 8704.75 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 7537.51 examples/s]
Map: 100%|██████████| 750/750 [00:00<00:00, 9208.10 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 52/52 [00:24<00:00,  2.14it/s, loss=0.413]


Epoch 1 Loss: 26.9050
Epoch 2/4


Training Epoch 2: 100%|██████████| 52/52 [00:24<00:00,  2.15it/s, loss=0.445]


Epoch 2 Loss: 11.5672
Epoch 3/4


Training Epoch 3: 100%|██████████| 52/52 [00:24<00:00,  2.08it/s, loss=0.212] 


Epoch 3 Loss: 8.3830
Epoch 4/4


Training Epoch 4: 100%|██████████| 52/52 [00:24<00:00,  2.11it/s, loss=0.0598]


Epoch 4 Loss: 6.0416
Test Metrics: Precision=0.9333, Recall=0.9333, F1=0.9333

Fine-tuning roberta-large (large) with Train Size 415, Split 5...


Map: 100%|██████████| 415/415 [00:00<00:00, 8779.34 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 7061.40 examples/s]
Map: 100%|██████████| 750/750 [00:00<00:00, 9601.49 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 52/52 [00:26<00:00,  1.96it/s, loss=0.274]


Epoch 1 Loss: 27.4507
Epoch 2/4


Training Epoch 2: 100%|██████████| 52/52 [00:26<00:00,  1.99it/s, loss=0.238]


Epoch 2 Loss: 12.3075
Epoch 3/4


Training Epoch 3: 100%|██████████| 52/52 [00:26<00:00,  1.96it/s, loss=0.0827]


Epoch 3 Loss: 8.7767
Epoch 4/4


Training Epoch 4: 100%|██████████| 52/52 [00:26<00:00,  1.98it/s, loss=0.1]   


Epoch 4 Loss: 6.0648
Test Metrics: Precision=0.9357, Recall=0.9357, F1=0.9357

Fine-tuning roberta-large (large) with Train Size 420, Split 1...


Map: 100%|██████████| 420/420 [00:00<00:00, 9058.18 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 8180.97 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9150.86 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 53/53 [00:24<00:00,  2.17it/s, loss=0.794]


Epoch 1 Loss: 25.9182
Epoch 2/4


Training Epoch 2: 100%|██████████| 53/53 [00:23<00:00,  2.23it/s, loss=0.243]


Epoch 2 Loss: 12.3591
Epoch 3/4


Training Epoch 3: 100%|██████████| 53/53 [00:24<00:00,  2.19it/s, loss=0.0782]


Epoch 3 Loss: 8.0457
Epoch 4/4


Training Epoch 4: 100%|██████████| 53/53 [00:23<00:00,  2.22it/s, loss=0.0735]


Epoch 4 Loss: 5.9930
Test Metrics: Precision=0.9339, Recall=0.9339, F1=0.9339

Fine-tuning roberta-large (large) with Train Size 420, Split 2...


Map: 100%|██████████| 420/420 [00:00<00:00, 9220.61 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7764.49 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9269.13 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 53/53 [00:23<00:00,  2.23it/s, loss=0.219]


Epoch 1 Loss: 28.1124
Epoch 2/4


Training Epoch 2: 100%|██████████| 53/53 [00:24<00:00,  2.14it/s, loss=0.339]


Epoch 2 Loss: 13.5495
Epoch 3/4


Training Epoch 3: 100%|██████████| 53/53 [00:23<00:00,  2.23it/s, loss=0.114] 


Epoch 3 Loss: 8.8938
Epoch 4/4


Training Epoch 4: 100%|██████████| 53/53 [00:24<00:00,  2.18it/s, loss=0.0369]


Epoch 4 Loss: 6.2284
Test Metrics: Precision=0.9331, Recall=0.9331, F1=0.9331

Fine-tuning roberta-large (large) with Train Size 420, Split 3...


Map: 100%|██████████| 420/420 [00:00<00:00, 8775.39 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7378.15 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9731.90 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 53/53 [00:25<00:00,  2.05it/s, loss=0.247]


Epoch 1 Loss: 26.1990
Epoch 2/4


Training Epoch 2: 100%|██████████| 53/53 [00:25<00:00,  2.07it/s, loss=0.34] 


Epoch 2 Loss: 12.4356
Epoch 3/4


Training Epoch 3: 100%|██████████| 53/53 [00:24<00:00,  2.12it/s, loss=0.0953]


Epoch 3 Loss: 7.9064
Epoch 4/4


Training Epoch 4: 100%|██████████| 53/53 [00:25<00:00,  2.11it/s, loss=0.0837]


Epoch 4 Loss: 5.2613
Test Metrics: Precision=0.9342, Recall=0.9342, F1=0.9342

Fine-tuning roberta-large (large) with Train Size 420, Split 4...


Map: 100%|██████████| 420/420 [00:00<00:00, 9069.93 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7488.87 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9359.62 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 53/53 [00:24<00:00,  2.15it/s, loss=0.492]


Epoch 1 Loss: 27.6256
Epoch 2/4


Training Epoch 2: 100%|██████████| 53/53 [00:24<00:00,  2.13it/s, loss=0.226]


Epoch 2 Loss: 13.6374
Epoch 3/4


Training Epoch 3: 100%|██████████| 53/53 [00:24<00:00,  2.18it/s, loss=0.22]  


Epoch 3 Loss: 9.8279
Epoch 4/4


Training Epoch 4: 100%|██████████| 53/53 [00:24<00:00,  2.14it/s, loss=0.0895]


Epoch 4 Loss: 7.5654
Test Metrics: Precision=0.9294, Recall=0.9294, F1=0.9294

Fine-tuning roberta-large (large) with Train Size 420, Split 5...


Map: 100%|██████████| 420/420 [00:00<00:00, 8919.71 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7520.52 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9197.25 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 53/53 [00:26<00:00,  2.01it/s, loss=0.463]


Epoch 1 Loss: 31.6609
Epoch 2/4


Training Epoch 2: 100%|██████████| 53/53 [00:26<00:00,  2.02it/s, loss=0.353]


Epoch 2 Loss: 15.3093
Epoch 3/4


Training Epoch 3: 100%|██████████| 53/53 [00:26<00:00,  2.04it/s, loss=0.212]


Epoch 3 Loss: 10.0713
Epoch 4/4


Training Epoch 4: 100%|██████████| 53/53 [00:26<00:00,  1.97it/s, loss=0.0619]


Epoch 4 Loss: 7.5230
Test Metrics: Precision=0.9356, Recall=0.9356, F1=0.9356

Fine-tuning roberta-large (large) with Train Size 425, Split 1...


Map: 100%|██████████| 425/425 [00:00<00:00, 9100.08 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 8429.07 examples/s]
Map: 100%|██████████| 738/738 [00:00<00:00, 9285.89 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 54/54 [00:23<00:00,  2.27it/s, loss=0.762]


Epoch 1 Loss: 31.5488
Epoch 2/4


Training Epoch 2: 100%|██████████| 54/54 [00:23<00:00,  2.25it/s, loss=0.319] 


Epoch 2 Loss: 13.5286
Epoch 3/4


Training Epoch 3: 100%|██████████| 54/54 [00:23<00:00,  2.26it/s, loss=0.0714]


Epoch 3 Loss: 9.4805
Epoch 4/4


Training Epoch 4: 100%|██████████| 54/54 [00:23<00:00,  2.25it/s, loss=0.261] 


Epoch 4 Loss: 6.9146
Test Metrics: Precision=0.9337, Recall=0.9337, F1=0.9337

Fine-tuning roberta-large (large) with Train Size 425, Split 2...


Map: 100%|██████████| 425/425 [00:00<00:00, 9190.40 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 6417.12 examples/s]
Map: 100%|██████████| 738/738 [00:00<00:00, 9215.09 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 54/54 [00:24<00:00,  2.20it/s, loss=0.145]


Epoch 1 Loss: 26.8446
Epoch 2/4


Training Epoch 2: 100%|██████████| 54/54 [00:24<00:00,  2.18it/s, loss=0.152]


Epoch 2 Loss: 12.1189
Epoch 3/4


Training Epoch 3: 100%|██████████| 54/54 [00:24<00:00,  2.20it/s, loss=0.0947]


Epoch 3 Loss: 7.9416
Epoch 4/4


Training Epoch 4: 100%|██████████| 54/54 [00:24<00:00,  2.18it/s, loss=0.000955]


Epoch 4 Loss: 5.3907
Test Metrics: Precision=0.9320, Recall=0.9320, F1=0.9320

Fine-tuning roberta-large (large) with Train Size 425, Split 3...


Map: 100%|██████████| 425/425 [00:00<00:00, 8654.14 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 6696.39 examples/s]
Map: 100%|██████████| 738/738 [00:00<00:00, 9412.71 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 54/54 [00:25<00:00,  2.08it/s, loss=0.399]


Epoch 1 Loss: 25.1019
Epoch 2/4


Training Epoch 2: 100%|██████████| 54/54 [00:24<00:00,  2.21it/s, loss=0.115] 


Epoch 2 Loss: 11.8957
Epoch 3/4


Training Epoch 3: 100%|██████████| 54/54 [00:25<00:00,  2.10it/s, loss=0.233] 


Epoch 3 Loss: 8.0170
Epoch 4/4


Training Epoch 4: 100%|██████████| 54/54 [00:24<00:00,  2.19it/s, loss=0.107] 


Epoch 4 Loss: 5.3336
Test Metrics: Precision=0.9354, Recall=0.9354, F1=0.9354

Fine-tuning roberta-large (large) with Train Size 425, Split 4...


Map: 100%|██████████| 425/425 [00:00<00:00, 9029.65 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 8421.30 examples/s]
Map: 100%|██████████| 738/738 [00:00<00:00, 9102.21 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 54/54 [00:25<00:00,  2.11it/s, loss=0.683]


Epoch 1 Loss: 26.6364
Epoch 2/4


Training Epoch 2: 100%|██████████| 54/54 [00:25<00:00,  2.16it/s, loss=0.195]


Epoch 2 Loss: 13.2594
Epoch 3/4


Training Epoch 3: 100%|██████████| 54/54 [00:25<00:00,  2.11it/s, loss=0.283] 


Epoch 3 Loss: 9.3959
Epoch 4/4


Training Epoch 4: 100%|██████████| 54/54 [00:24<00:00,  2.16it/s, loss=0.0626]


Epoch 4 Loss: 7.2291
Test Metrics: Precision=0.9298, Recall=0.9298, F1=0.9298

Fine-tuning roberta-large (large) with Train Size 425, Split 5...


Map: 100%|██████████| 425/425 [00:00<00:00, 8769.82 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 7574.97 examples/s]
Map: 100%|██████████| 738/738 [00:00<00:00, 9531.66 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 54/54 [00:27<00:00,  1.96it/s, loss=0.136]


Epoch 1 Loss: 28.0860
Epoch 2/4


Training Epoch 2: 100%|██████████| 54/54 [00:26<00:00,  2.02it/s, loss=0.462]


Epoch 2 Loss: 13.5567
Epoch 3/4


Training Epoch 3: 100%|██████████| 54/54 [00:26<00:00,  2.05it/s, loss=0.0917]


Epoch 3 Loss: 9.4310
Epoch 4/4


Training Epoch 4: 100%|██████████| 54/54 [00:26<00:00,  2.05it/s, loss=0.0933]


Epoch 4 Loss: 6.0742
Test Metrics: Precision=0.9367, Recall=0.9367, F1=0.9367

Fine-tuning roberta-large (large) with Train Size 430, Split 1...


Map: 100%|██████████| 430/430 [00:00<00:00, 8701.33 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 7131.05 examples/s]
Map: 100%|██████████| 732/732 [00:00<00:00, 9310.64 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 54/54 [00:24<00:00,  2.18it/s, loss=0.418]


Epoch 1 Loss: 26.9572
Epoch 2/4


Training Epoch 2: 100%|██████████| 54/54 [00:24<00:00,  2.22it/s, loss=0.127]


Epoch 2 Loss: 12.9481
Epoch 3/4


Training Epoch 3: 100%|██████████| 54/54 [00:24<00:00,  2.23it/s, loss=0.184] 


Epoch 3 Loss: 8.5625
Epoch 4/4


Training Epoch 4: 100%|██████████| 54/54 [00:23<00:00,  2.25it/s, loss=0.129] 


Epoch 4 Loss: 5.9923
Test Metrics: Precision=0.9341, Recall=0.9341, F1=0.9341

Fine-tuning roberta-large (large) with Train Size 430, Split 2...


Map: 100%|██████████| 430/430 [00:00<00:00, 9072.51 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 6668.58 examples/s]
Map: 100%|██████████| 732/732 [00:00<00:00, 9453.03 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 54/54 [00:25<00:00,  2.14it/s, loss=0.357]


Epoch 1 Loss: 28.2894
Epoch 2/4


Training Epoch 2: 100%|██████████| 54/54 [00:25<00:00,  2.15it/s, loss=0.202]


Epoch 2 Loss: 12.9720
Epoch 3/4


Training Epoch 3: 100%|██████████| 54/54 [00:25<00:00,  2.11it/s, loss=0.133] 


Epoch 3 Loss: 8.5956
Epoch 4/4


Training Epoch 4: 100%|██████████| 54/54 [00:24<00:00,  2.18it/s, loss=0.105] 


Epoch 4 Loss: 6.2394
Test Metrics: Precision=0.9321, Recall=0.9321, F1=0.9321

Fine-tuning roberta-large (large) with Train Size 430, Split 3...


Map: 100%|██████████| 430/430 [00:00<00:00, 8622.83 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 7286.04 examples/s]
Map: 100%|██████████| 732/732 [00:00<00:00, 3579.30 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 54/54 [00:26<00:00,  2.06it/s, loss=0.299]


Epoch 1 Loss: 27.5621
Epoch 2/4


Training Epoch 2: 100%|██████████| 54/54 [00:25<00:00,  2.08it/s, loss=0.202]


Epoch 2 Loss: 12.9460
Epoch 3/4


Training Epoch 3: 100%|██████████| 54/54 [00:26<00:00,  2.06it/s, loss=0.183] 


Epoch 3 Loss: 8.6892
Epoch 4/4


Training Epoch 4: 100%|██████████| 54/54 [00:26<00:00,  2.06it/s, loss=0.103] 


Epoch 4 Loss: 7.1501
Test Metrics: Precision=0.9287, Recall=0.9287, F1=0.9287

Fine-tuning roberta-large (large) with Train Size 430, Split 4...


Map: 100%|██████████| 430/430 [00:00<00:00, 8788.81 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 6817.82 examples/s]
Map: 100%|██████████| 732/732 [00:00<00:00, 9497.65 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 54/54 [00:25<00:00,  2.15it/s, loss=0.291]


Epoch 1 Loss: 29.6490
Epoch 2/4


Training Epoch 2: 100%|██████████| 54/54 [00:25<00:00,  2.10it/s, loss=0.153]


Epoch 2 Loss: 13.6946
Epoch 3/4


Training Epoch 3: 100%|██████████| 54/54 [00:25<00:00,  2.11it/s, loss=0.183] 


Epoch 3 Loss: 9.8659
Epoch 4/4


Training Epoch 4: 100%|██████████| 54/54 [00:25<00:00,  2.13it/s, loss=0.165] 


Epoch 4 Loss: 7.1539
Test Metrics: Precision=0.9290, Recall=0.9290, F1=0.9290

Fine-tuning roberta-large (large) with Train Size 430, Split 5...


Map: 100%|██████████| 430/430 [00:00<00:00, 8726.80 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 7162.35 examples/s]
Map: 100%|██████████| 732/732 [00:00<00:00, 9631.73 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 54/54 [00:27<00:00,  1.98it/s, loss=0.294]


Epoch 1 Loss: 30.2494
Epoch 2/4


Training Epoch 2: 100%|██████████| 54/54 [00:27<00:00,  1.96it/s, loss=0.187] 


Epoch 2 Loss: 13.8670
Epoch 3/4


Training Epoch 3: 100%|██████████| 54/54 [00:27<00:00,  1.97it/s, loss=0.25]  


Epoch 3 Loss: 9.2408
Epoch 4/4


Training Epoch 4: 100%|██████████| 54/54 [00:26<00:00,  2.01it/s, loss=0.1]   


Epoch 4 Loss: 6.8324
Test Metrics: Precision=0.9345, Recall=0.9345, F1=0.9345

Fine-tuning roberta-large (large) with Train Size 435, Split 1...


Map: 100%|██████████| 435/435 [00:00<00:00, 8849.77 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 7586.06 examples/s]
Map: 100%|██████████| 726/726 [00:00<00:00, 9151.68 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 55/55 [00:24<00:00,  2.21it/s, loss=0.444]


Epoch 1 Loss: 26.5033
Epoch 2/4


Training Epoch 2: 100%|██████████| 55/55 [00:25<00:00,  2.20it/s, loss=0.122]


Epoch 2 Loss: 12.9661
Epoch 3/4


Training Epoch 3: 100%|██████████| 55/55 [00:24<00:00,  2.20it/s, loss=0.113] 


Epoch 3 Loss: 8.4366
Epoch 4/4


Training Epoch 4: 100%|██████████| 55/55 [00:24<00:00,  2.22it/s, loss=0.064] 


Epoch 4 Loss: 5.8724
Test Metrics: Precision=0.9346, Recall=0.9346, F1=0.9346

Fine-tuning roberta-large (large) with Train Size 435, Split 2...


Map: 100%|██████████| 435/435 [00:00<00:00, 9137.87 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 7444.45 examples/s]
Map: 100%|██████████| 726/726 [00:00<00:00, 9163.13 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 55/55 [00:25<00:00,  2.18it/s, loss=0.309]


Epoch 1 Loss: 28.4319
Epoch 2/4


Training Epoch 2: 100%|██████████| 55/55 [00:24<00:00,  2.22it/s, loss=0.304]


Epoch 2 Loss: 14.1718
Epoch 3/4


Training Epoch 3: 100%|██████████| 55/55 [00:25<00:00,  2.17it/s, loss=0.137] 


Epoch 3 Loss: 9.7651
Epoch 4/4


Training Epoch 4: 100%|██████████| 55/55 [00:25<00:00,  2.17it/s, loss=0.134] 


Epoch 4 Loss: 6.8073
Test Metrics: Precision=0.9325, Recall=0.9325, F1=0.9325

Fine-tuning roberta-large (large) with Train Size 435, Split 3...


Map: 100%|██████████| 435/435 [00:00<00:00, 8582.31 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 7761.45 examples/s]
Map: 100%|██████████| 726/726 [00:00<00:00, 9432.50 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 55/55 [00:25<00:00,  2.14it/s, loss=0.374]


Epoch 1 Loss: 27.4317
Epoch 2/4


Training Epoch 2: 100%|██████████| 55/55 [00:26<00:00,  2.08it/s, loss=0.163]


Epoch 2 Loss: 12.4561
Epoch 3/4


Training Epoch 3: 100%|██████████| 55/55 [00:26<00:00,  2.09it/s, loss=0.146] 


Epoch 3 Loss: 8.3164
Epoch 4/4


Training Epoch 4: 100%|██████████| 55/55 [00:25<00:00,  2.13it/s, loss=0.124] 


Epoch 4 Loss: 6.0020
Test Metrics: Precision=0.9329, Recall=0.9329, F1=0.9329

Fine-tuning roberta-large (large) with Train Size 435, Split 4...


Map: 100%|██████████| 435/435 [00:00<00:00, 9059.56 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 7153.87 examples/s]
Map: 100%|██████████| 726/726 [00:00<00:00, 9417.74 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 55/55 [00:25<00:00,  2.14it/s, loss=0.235]


Epoch 1 Loss: 27.7842
Epoch 2/4


Training Epoch 2: 100%|██████████| 55/55 [00:26<00:00,  2.08it/s, loss=0.186]


Epoch 2 Loss: 12.5144
Epoch 3/4


Training Epoch 3: 100%|██████████| 55/55 [00:25<00:00,  2.13it/s, loss=0.0997]


Epoch 3 Loss: 8.4388
Epoch 4/4


Training Epoch 4: 100%|██████████| 55/55 [00:25<00:00,  2.17it/s, loss=0.0523]


Epoch 4 Loss: 5.8118
Test Metrics: Precision=0.9317, Recall=0.9317, F1=0.9317

Fine-tuning roberta-large (large) with Train Size 435, Split 5...


Map: 100%|██████████| 435/435 [00:00<00:00, 8539.05 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 7503.23 examples/s]
Map: 100%|██████████| 726/726 [00:00<00:00, 9588.61 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 55/55 [00:26<00:00,  2.04it/s, loss=0.279]


Epoch 1 Loss: 29.6078
Epoch 2/4


Training Epoch 2: 100%|██████████| 55/55 [00:28<00:00,  1.94it/s, loss=0.26] 


Epoch 2 Loss: 14.2696
Epoch 3/4


Training Epoch 3: 100%|██████████| 55/55 [00:27<00:00,  1.97it/s, loss=0.116] 


Epoch 3 Loss: 9.5279
Epoch 4/4


Training Epoch 4: 100%|██████████| 55/55 [00:27<00:00,  1.98it/s, loss=0.114] 


Epoch 4 Loss: 6.8558
Test Metrics: Precision=0.9331, Recall=0.9331, F1=0.9331

Fine-tuning roberta-large (large) with Train Size 440, Split 1...


Map: 100%|██████████| 440/440 [00:00<00:00, 8848.45 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7897.86 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9423.43 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 55/55 [00:25<00:00,  2.18it/s, loss=0.165]


Epoch 1 Loss: 26.4347
Epoch 2/4


Training Epoch 2: 100%|██████████| 55/55 [00:24<00:00,  2.23it/s, loss=0.289]


Epoch 2 Loss: 13.4236
Epoch 3/4


Training Epoch 3: 100%|██████████| 55/55 [00:25<00:00,  2.19it/s, loss=0.208] 


Epoch 3 Loss: 8.9749
Epoch 4/4


Training Epoch 4: 100%|██████████| 55/55 [00:24<00:00,  2.22it/s, loss=0.0447]


Epoch 4 Loss: 6.3138
Test Metrics: Precision=0.9350, Recall=0.9350, F1=0.9350

Fine-tuning roberta-large (large) with Train Size 440, Split 2...


Map: 100%|██████████| 440/440 [00:00<00:00, 9041.40 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7830.01 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9512.99 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 55/55 [00:26<00:00,  2.11it/s, loss=0.369]


Epoch 1 Loss: 28.1395
Epoch 2/4


Training Epoch 2: 100%|██████████| 55/55 [00:25<00:00,  2.15it/s, loss=0.227]


Epoch 2 Loss: 13.2394
Epoch 3/4


Training Epoch 3: 100%|██████████| 55/55 [00:26<00:00,  2.10it/s, loss=0.109] 


Epoch 3 Loss: 8.6419
Epoch 4/4


Training Epoch 4: 100%|██████████| 55/55 [00:25<00:00,  2.15it/s, loss=0.102] 


Epoch 4 Loss: 6.3461
Test Metrics: Precision=0.9327, Recall=0.9327, F1=0.9327

Fine-tuning roberta-large (large) with Train Size 440, Split 3...


Map: 100%|██████████| 440/440 [00:00<00:00, 8750.44 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7385.67 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9726.49 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 55/55 [00:27<00:00,  2.01it/s, loss=0.237]


Epoch 1 Loss: 26.3333
Epoch 2/4


Training Epoch 2: 100%|██████████| 55/55 [00:26<00:00,  2.07it/s, loss=0.12] 


Epoch 2 Loss: 12.1963
Epoch 3/4


Training Epoch 3: 100%|██████████| 55/55 [00:26<00:00,  2.06it/s, loss=0.0754]


Epoch 3 Loss: 8.0839
Epoch 4/4


Training Epoch 4: 100%|██████████| 55/55 [00:26<00:00,  2.11it/s, loss=0.104] 


Epoch 4 Loss: 5.1140
Test Metrics: Precision=0.9357, Recall=0.9357, F1=0.9357

Fine-tuning roberta-large (large) with Train Size 440, Split 4...


Map: 100%|██████████| 440/440 [00:00<00:00, 8565.68 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7079.13 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9645.87 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 55/55 [00:26<00:00,  2.07it/s, loss=0.227]


Epoch 1 Loss: 26.9278
Epoch 2/4


Training Epoch 2: 100%|██████████| 55/55 [00:26<00:00,  2.06it/s, loss=0.378]


Epoch 2 Loss: 13.6236
Epoch 3/4


Training Epoch 3: 100%|██████████| 55/55 [00:26<00:00,  2.05it/s, loss=0.113] 


Epoch 3 Loss: 9.7310
Epoch 4/4


Training Epoch 4: 100%|██████████| 55/55 [00:27<00:00,  2.03it/s, loss=0.0924]


Epoch 4 Loss: 7.2485
Test Metrics: Precision=0.9307, Recall=0.9307, F1=0.9307

Fine-tuning roberta-large (large) with Train Size 440, Split 5...


Map: 100%|██████████| 440/440 [00:00<00:00, 8760.87 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7680.92 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9420.93 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 55/55 [00:27<00:00,  2.03it/s, loss=0.309]


Epoch 1 Loss: 27.2495
Epoch 2/4


Training Epoch 2: 100%|██████████| 55/55 [00:27<00:00,  2.00it/s, loss=0.168]


Epoch 2 Loss: 12.4658
Epoch 3/4


Training Epoch 3: 100%|██████████| 55/55 [00:27<00:00,  1.99it/s, loss=0.234] 


Epoch 3 Loss: 8.3698
Epoch 4/4


Training Epoch 4: 100%|██████████| 55/55 [00:27<00:00,  2.01it/s, loss=0.152] 


Epoch 4 Loss: 5.7582
Test Metrics: Precision=0.9379, Recall=0.9379, F1=0.9379

Fine-tuning roberta-large (large) with Train Size 445, Split 1...


Map: 100%|██████████| 445/445 [00:00<00:00, 9132.24 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 7596.83 examples/s]
Map: 100%|██████████| 714/714 [00:00<00:00, 9570.07 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 56/56 [00:25<00:00,  2.21it/s, loss=0.183]


Epoch 1 Loss: 28.5755
Epoch 2/4


Training Epoch 2: 100%|██████████| 56/56 [00:24<00:00,  2.27it/s, loss=0.1]  


Epoch 2 Loss: 13.8507
Epoch 3/4


Training Epoch 3: 100%|██████████| 56/56 [00:24<00:00,  2.25it/s, loss=0.124] 


Epoch 3 Loss: 9.0905
Epoch 4/4


Training Epoch 4: 100%|██████████| 56/56 [00:25<00:00,  2.19it/s, loss=0.137] 


Epoch 4 Loss: 6.8684
Test Metrics: Precision=0.9327, Recall=0.9327, F1=0.9327

Fine-tuning roberta-large (large) with Train Size 445, Split 2...


Map: 100%|██████████| 445/445 [00:00<00:00, 8809.57 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 7284.34 examples/s]
Map: 100%|██████████| 714/714 [00:00<00:00, 9362.75 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 56/56 [00:26<00:00,  2.11it/s, loss=0.412]


Epoch 1 Loss: 28.3099
Epoch 2/4


Training Epoch 2: 100%|██████████| 56/56 [00:26<00:00,  2.14it/s, loss=0.301]


Epoch 2 Loss: 13.1932
Epoch 3/4


Training Epoch 3: 100%|██████████| 56/56 [00:26<00:00,  2.10it/s, loss=0.188] 


Epoch 3 Loss: 8.9429
Epoch 4/4


Training Epoch 4: 100%|██████████| 56/56 [00:26<00:00,  2.13it/s, loss=0.198] 


Epoch 4 Loss: 6.4434
Test Metrics: Precision=0.9342, Recall=0.9342, F1=0.9342

Fine-tuning roberta-large (large) with Train Size 445, Split 3...


Map: 100%|██████████| 445/445 [00:00<00:00, 8622.44 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 6763.41 examples/s]
Map: 100%|██████████| 714/714 [00:00<00:00, 9531.69 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 56/56 [00:26<00:00,  2.09it/s, loss=0.367]


Epoch 1 Loss: 26.2775
Epoch 2/4


Training Epoch 2: 100%|██████████| 56/56 [00:26<00:00,  2.14it/s, loss=0.189]


Epoch 2 Loss: 12.3567
Epoch 3/4


Training Epoch 3: 100%|██████████| 56/56 [00:26<00:00,  2.12it/s, loss=0.139] 


Epoch 3 Loss: 7.9965
Epoch 4/4


Training Epoch 4: 100%|██████████| 56/56 [00:26<00:00,  2.11it/s, loss=0.0372]


Epoch 4 Loss: 6.5750
Test Metrics: Precision=0.9277, Recall=0.9277, F1=0.9277

Fine-tuning roberta-large (large) with Train Size 445, Split 4...


Map: 100%|██████████| 445/445 [00:00<00:00, 8833.33 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 8294.11 examples/s]
Map: 100%|██████████| 714/714 [00:00<00:00, 9408.94 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 56/56 [00:26<00:00,  2.10it/s, loss=0.473]


Epoch 1 Loss: 27.5953
Epoch 2/4


Training Epoch 2: 100%|██████████| 56/56 [00:26<00:00,  2.12it/s, loss=0.203]


Epoch 2 Loss: 12.2834
Epoch 3/4


Training Epoch 3: 100%|██████████| 56/56 [00:25<00:00,  2.17it/s, loss=0.0494]


Epoch 3 Loss: 8.1711
Epoch 4/4


Training Epoch 4: 100%|██████████| 56/56 [00:26<00:00,  2.11it/s, loss=0.108] 


Epoch 4 Loss: 6.0327
Test Metrics: Precision=0.9339, Recall=0.9339, F1=0.9339

Fine-tuning roberta-large (large) with Train Size 445, Split 5...


Map: 100%|██████████| 445/445 [00:00<00:00, 8468.04 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 7748.21 examples/s]
Map: 100%|██████████| 714/714 [00:00<00:00, 9512.10 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 56/56 [00:28<00:00,  1.99it/s, loss=0.291]


Epoch 1 Loss: 27.6308
Epoch 2/4


Training Epoch 2: 100%|██████████| 56/56 [00:27<00:00,  2.00it/s, loss=0.249]


Epoch 2 Loss: 14.2449
Epoch 3/4


Training Epoch 3: 100%|██████████| 56/56 [00:28<00:00,  1.98it/s, loss=0.0749]


Epoch 3 Loss: 8.8365
Epoch 4/4


Training Epoch 4: 100%|██████████| 56/56 [00:28<00:00,  1.99it/s, loss=0.0911]


Epoch 4 Loss: 5.6191
Test Metrics: Precision=0.9380, Recall=0.9380, F1=0.9380

Fine-tuning roberta-large (large) with Train Size 450, Split 1...


Map: 100%|██████████| 450/450 [00:00<00:00, 8742.10 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 7174.52 examples/s]
Map: 100%|██████████| 708/708 [00:00<00:00, 9380.06 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 57/57 [00:25<00:00,  2.20it/s, loss=0.156]


Epoch 1 Loss: 29.2872
Epoch 2/4


Training Epoch 2: 100%|██████████| 57/57 [00:25<00:00,  2.21it/s, loss=0.0861]


Epoch 2 Loss: 13.3153
Epoch 3/4


Training Epoch 3: 100%|██████████| 57/57 [00:25<00:00,  2.24it/s, loss=0.0938]


Epoch 3 Loss: 9.7179
Epoch 4/4


Training Epoch 4: 100%|██████████| 57/57 [00:25<00:00,  2.21it/s, loss=0.123] 


Epoch 4 Loss: 6.7936
Test Metrics: Precision=0.9328, Recall=0.9328, F1=0.9328

Fine-tuning roberta-large (large) with Train Size 450, Split 2...


Map: 100%|██████████| 450/450 [00:00<00:00, 9277.79 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 7298.67 examples/s]
Map: 100%|██████████| 708/708 [00:00<00:00, 9609.25 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 57/57 [00:26<00:00,  2.14it/s, loss=0.438]


Epoch 1 Loss: 29.0770
Epoch 2/4


Training Epoch 2: 100%|██████████| 57/57 [00:26<00:00,  2.11it/s, loss=0.089]


Epoch 2 Loss: 12.7473
Epoch 3/4


Training Epoch 3: 100%|██████████| 57/57 [00:26<00:00,  2.17it/s, loss=0.306] 


Epoch 3 Loss: 8.8619
Epoch 4/4


Training Epoch 4: 100%|██████████| 57/57 [00:25<00:00,  2.21it/s, loss=0.518] 


Epoch 4 Loss: 7.2418
Test Metrics: Precision=0.9349, Recall=0.9349, F1=0.9349

Fine-tuning roberta-large (large) with Train Size 450, Split 3...


Map: 100%|██████████| 450/450 [00:00<00:00, 9105.25 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 8671.09 examples/s]
Map: 100%|██████████| 708/708 [00:00<00:00, 9571.01 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 57/57 [00:26<00:00,  2.15it/s, loss=0.475]


Epoch 1 Loss: 27.0395
Epoch 2/4


Training Epoch 2: 100%|██████████| 57/57 [00:25<00:00,  2.20it/s, loss=0.238]


Epoch 2 Loss: 12.9870
Epoch 3/4


Training Epoch 3: 100%|██████████| 57/57 [00:27<00:00,  2.10it/s, loss=0.0486]


Epoch 3 Loss: 8.6498
Epoch 4/4


Training Epoch 4: 100%|██████████| 57/57 [00:27<00:00,  2.10it/s, loss=0.203] 


Epoch 4 Loss: 6.2634
Test Metrics: Precision=0.9354, Recall=0.9354, F1=0.9354

Fine-tuning roberta-large (large) with Train Size 450, Split 4...


Map: 100%|██████████| 450/450 [00:00<00:00, 8598.64 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 6969.21 examples/s]
Map: 100%|██████████| 708/708 [00:00<00:00, 9404.06 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 57/57 [00:26<00:00,  2.11it/s, loss=0.195]


Epoch 1 Loss: 29.0977
Epoch 2/4


Training Epoch 2: 100%|██████████| 57/57 [00:26<00:00,  2.13it/s, loss=0.0573]


Epoch 2 Loss: 14.1782
Epoch 3/4


Training Epoch 3: 100%|██████████| 57/57 [00:26<00:00,  2.11it/s, loss=0.142] 


Epoch 3 Loss: 9.0572
Epoch 4/4


Training Epoch 4: 100%|██████████| 57/57 [00:27<00:00,  2.09it/s, loss=0.145] 


Epoch 4 Loss: 6.5907
Test Metrics: Precision=0.9335, Recall=0.9335, F1=0.9335

Fine-tuning roberta-large (large) with Train Size 450, Split 5...


Map: 100%|██████████| 450/450 [00:00<00:00, 8749.03 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 7474.70 examples/s]
Map: 100%|██████████| 708/708 [00:00<00:00, 9378.16 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 57/57 [00:27<00:00,  2.08it/s, loss=0.398]


Epoch 1 Loss: 35.1932
Epoch 2/4


Training Epoch 2: 100%|██████████| 57/57 [00:28<00:00,  2.00it/s, loss=0.156]


Epoch 2 Loss: 15.7897
Epoch 3/4


Training Epoch 3: 100%|██████████| 57/57 [00:28<00:00,  2.00it/s, loss=0.129] 


Epoch 3 Loss: 11.2694
Epoch 4/4


Training Epoch 4: 100%|██████████| 57/57 [00:28<00:00,  1.99it/s, loss=0.116] 


Epoch 4 Loss: 8.2788
Test Metrics: Precision=0.9357, Recall=0.9357, F1=0.9357

Fine-tuning roberta-large (large) with Train Size 455, Split 1...


Map: 100%|██████████| 455/455 [00:00<00:00, 9257.69 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 7589.31 examples/s]
Map: 100%|██████████| 702/702 [00:00<00:00, 9216.63 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 57/57 [00:25<00:00,  2.21it/s, loss=0.366]


Epoch 1 Loss: 32.1015
Epoch 2/4


Training Epoch 2: 100%|██████████| 57/57 [00:25<00:00,  2.22it/s, loss=0.165]


Epoch 2 Loss: 16.2532
Epoch 3/4


Training Epoch 3: 100%|██████████| 57/57 [00:25<00:00,  2.20it/s, loss=0.164] 


Epoch 3 Loss: 11.5204
Epoch 4/4


Training Epoch 4: 100%|██████████| 57/57 [00:25<00:00,  2.22it/s, loss=0.186] 


Epoch 4 Loss: 9.9074
Test Metrics: Precision=0.9272, Recall=0.9272, F1=0.9272

Fine-tuning roberta-large (large) with Train Size 455, Split 2...


Map: 100%|██████████| 455/455 [00:00<00:00, 9297.97 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 7817.34 examples/s]
Map: 100%|██████████| 702/702 [00:00<00:00, 9434.28 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 57/57 [00:26<00:00,  2.12it/s, loss=0.283]


Epoch 1 Loss: 27.0942
Epoch 2/4


Training Epoch 2: 100%|██████████| 57/57 [00:26<00:00,  2.13it/s, loss=0.0802]


Epoch 2 Loss: 12.6013
Epoch 3/4


Training Epoch 3: 100%|██████████| 57/57 [00:26<00:00,  2.12it/s, loss=0.137] 


Epoch 3 Loss: 8.3481
Epoch 4/4


Training Epoch 4: 100%|██████████| 57/57 [00:27<00:00,  2.08it/s, loss=0.0553]


Epoch 4 Loss: 5.7876
Test Metrics: Precision=0.9378, Recall=0.9378, F1=0.9378

Fine-tuning roberta-large (large) with Train Size 455, Split 3...


Map: 100%|██████████| 455/455 [00:00<00:00, 8608.42 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 7678.01 examples/s]
Map: 100%|██████████| 702/702 [00:00<00:00, 9341.08 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 57/57 [00:27<00:00,  2.08it/s, loss=0.363]


Epoch 1 Loss: 29.8025
Epoch 2/4


Training Epoch 2: 100%|██████████| 57/57 [00:26<00:00,  2.15it/s, loss=0.318]


Epoch 2 Loss: 16.1629
Epoch 3/4


Training Epoch 3: 100%|██████████| 57/57 [00:26<00:00,  2.13it/s, loss=0.161] 


Epoch 3 Loss: 10.8085
Epoch 4/4


Training Epoch 4: 100%|██████████| 57/57 [00:27<00:00,  2.09it/s, loss=0.0829]


Epoch 4 Loss: 8.4282
Test Metrics: Precision=0.9315, Recall=0.9315, F1=0.9315

Fine-tuning roberta-large (large) with Train Size 455, Split 4...


Map: 100%|██████████| 455/455 [00:00<00:00, 8923.97 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 7726.35 examples/s]
Map: 100%|██████████| 702/702 [00:00<00:00, 9344.67 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 57/57 [00:27<00:00,  2.09it/s, loss=0.356]


Epoch 1 Loss: 28.4400
Epoch 2/4


Training Epoch 2: 100%|██████████| 57/57 [00:27<00:00,  2.07it/s, loss=0.249]


Epoch 2 Loss: 13.7233
Epoch 3/4


Training Epoch 3: 100%|██████████| 57/57 [00:27<00:00,  2.08it/s, loss=0.184] 


Epoch 3 Loss: 9.3856
Epoch 4/4


Training Epoch 4: 100%|██████████| 57/57 [00:26<00:00,  2.14it/s, loss=0.0612]


Epoch 4 Loss: 6.3968
Test Metrics: Precision=0.9318, Recall=0.9318, F1=0.9318

Fine-tuning roberta-large (large) with Train Size 455, Split 5...


Map: 100%|██████████| 455/455 [00:00<00:00, 9009.83 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 9135.07 examples/s]
Map: 100%|██████████| 702/702 [00:00<00:00, 9493.66 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 57/57 [00:28<00:00,  2.02it/s, loss=0.361]


Epoch 1 Loss: 30.5812
Epoch 2/4


Training Epoch 2: 100%|██████████| 57/57 [00:28<00:00,  2.04it/s, loss=0.356]


Epoch 2 Loss: 19.2760
Epoch 3/4


Training Epoch 3: 100%|██████████| 57/57 [00:28<00:00,  1.97it/s, loss=0.28] 


Epoch 3 Loss: 12.7401
Epoch 4/4


Training Epoch 4: 100%|██████████| 57/57 [00:28<00:00,  2.01it/s, loss=0.108] 


Epoch 4 Loss: 9.5604
Test Metrics: Precision=0.9337, Recall=0.9337, F1=0.9337

Fine-tuning roberta-large (large) with Train Size 460, Split 1...


Map: 100%|██████████| 460/460 [00:00<00:00, 9095.83 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7202.13 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9504.70 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 58/58 [00:25<00:00,  2.23it/s, loss=0.176]


Epoch 1 Loss: 27.6980
Epoch 2/4


Training Epoch 2: 100%|██████████| 58/58 [00:26<00:00,  2.21it/s, loss=0.212] 


Epoch 2 Loss: 13.0487
Epoch 3/4


Training Epoch 3: 100%|██████████| 58/58 [00:26<00:00,  2.22it/s, loss=0.127] 


Epoch 3 Loss: 8.3409
Epoch 4/4


Training Epoch 4: 100%|██████████| 58/58 [00:26<00:00,  2.21it/s, loss=0.154] 


Epoch 4 Loss: 5.9270
Test Metrics: Precision=0.9337, Recall=0.9337, F1=0.9337

Fine-tuning roberta-large (large) with Train Size 460, Split 2...


Map: 100%|██████████| 460/460 [00:00<00:00, 8988.83 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7324.35 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9478.59 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 58/58 [00:26<00:00,  2.16it/s, loss=0.264]


Epoch 1 Loss: 29.3778
Epoch 2/4


Training Epoch 2: 100%|██████████| 58/58 [00:27<00:00,  2.14it/s, loss=0.0749]


Epoch 2 Loss: 12.4863
Epoch 3/4


Training Epoch 3: 100%|██████████| 58/58 [00:26<00:00,  2.21it/s, loss=0.121] 


Epoch 3 Loss: 8.4642
Epoch 4/4


Training Epoch 4: 100%|██████████| 58/58 [00:26<00:00,  2.17it/s, loss=0.115] 


Epoch 4 Loss: 5.7256
Test Metrics: Precision=0.9350, Recall=0.9350, F1=0.9350

Fine-tuning roberta-large (large) with Train Size 460, Split 3...


Map: 100%|██████████| 460/460 [00:00<00:00, 8719.81 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7923.53 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9557.70 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 58/58 [00:27<00:00,  2.14it/s, loss=0.198]


Epoch 1 Loss: 27.2376
Epoch 2/4


Training Epoch 2: 100%|██████████| 58/58 [00:27<00:00,  2.12it/s, loss=0.125]


Epoch 2 Loss: 13.0997
Epoch 3/4


Training Epoch 3: 100%|██████████| 58/58 [00:27<00:00,  2.09it/s, loss=0.224]


Epoch 3 Loss: 10.5917
Epoch 4/4


Training Epoch 4: 100%|██████████| 58/58 [00:27<00:00,  2.08it/s, loss=0.159] 


Epoch 4 Loss: 7.1610
Test Metrics: Precision=0.9341, Recall=0.9341, F1=0.9341

Fine-tuning roberta-large (large) with Train Size 460, Split 4...


Map: 100%|██████████| 460/460 [00:00<00:00, 8742.92 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7742.29 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9287.17 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 58/58 [00:27<00:00,  2.10it/s, loss=0.275]


Epoch 1 Loss: 29.3719
Epoch 2/4


Training Epoch 2: 100%|██████████| 58/58 [00:26<00:00,  2.15it/s, loss=0.342]


Epoch 2 Loss: 13.5868
Epoch 3/4


Training Epoch 3: 100%|██████████| 58/58 [00:27<00:00,  2.11it/s, loss=0.0376]


Epoch 3 Loss: 9.0034
Epoch 4/4


Training Epoch 4: 100%|██████████| 58/58 [00:27<00:00,  2.14it/s, loss=0.0747]


Epoch 4 Loss: 6.1271
Test Metrics: Precision=0.9349, Recall=0.9349, F1=0.9349

Fine-tuning roberta-large (large) with Train Size 460, Split 5...


Map: 100%|██████████| 460/460 [00:00<00:00, 8818.17 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7168.68 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9631.33 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 58/58 [00:28<00:00,  2.00it/s, loss=0.303]


Epoch 1 Loss: 31.6591
Epoch 2/4


Training Epoch 2: 100%|██████████| 58/58 [00:28<00:00,  2.05it/s, loss=0.289]


Epoch 2 Loss: 13.6718
Epoch 3/4


Training Epoch 3: 100%|██████████| 58/58 [00:28<00:00,  2.02it/s, loss=0.213] 


Epoch 3 Loss: 9.0556
Epoch 4/4


Training Epoch 4: 100%|██████████| 58/58 [00:28<00:00,  2.02it/s, loss=0.0812]


Epoch 4 Loss: 6.0937
Test Metrics: Precision=0.9390, Recall=0.9390, F1=0.9390

Fine-tuning roberta-large (large) with Train Size 465, Split 1...


Map: 100%|██████████| 465/465 [00:00<00:00, 9044.65 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 8071.81 examples/s]
Map: 100%|██████████| 690/690 [00:00<00:00, 9295.86 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 59/59 [00:26<00:00,  2.26it/s, loss=0.343]


Epoch 1 Loss: 28.8064
Epoch 2/4


Training Epoch 2: 100%|██████████| 59/59 [00:25<00:00,  2.31it/s, loss=0.164]


Epoch 2 Loss: 13.7625
Epoch 3/4


Training Epoch 3: 100%|██████████| 59/59 [00:26<00:00,  2.22it/s, loss=0.0852]


Epoch 3 Loss: 9.5554
Epoch 4/4


Training Epoch 4: 100%|██████████| 59/59 [00:26<00:00,  2.25it/s, loss=0.0657]


Epoch 4 Loss: 7.0561
Test Metrics: Precision=0.9332, Recall=0.9332, F1=0.9332

Fine-tuning roberta-large (large) with Train Size 465, Split 2...


Map: 100%|██████████| 465/465 [00:00<00:00, 9169.02 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 7655.79 examples/s]
Map: 100%|██████████| 690/690 [00:00<00:00, 9412.10 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 59/59 [00:26<00:00,  2.21it/s, loss=0.155]


Epoch 1 Loss: 27.1652
Epoch 2/4


Training Epoch 2: 100%|██████████| 59/59 [00:27<00:00,  2.14it/s, loss=0.0609]


Epoch 2 Loss: 12.7374
Epoch 3/4


Training Epoch 3: 100%|██████████| 59/59 [00:27<00:00,  2.16it/s, loss=0.0177]


Epoch 3 Loss: 7.9362
Epoch 4/4


Training Epoch 4: 100%|██████████| 59/59 [00:27<00:00,  2.17it/s, loss=0.00463]


Epoch 4 Loss: 5.0688
Test Metrics: Precision=0.9333, Recall=0.9333, F1=0.9333

Fine-tuning roberta-large (large) with Train Size 465, Split 3...


Map: 100%|██████████| 465/465 [00:00<00:00, 8680.54 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 7989.64 examples/s]
Map: 100%|██████████| 690/690 [00:00<00:00, 9356.81 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 59/59 [00:28<00:00,  2.09it/s, loss=0.255]


Epoch 1 Loss: 29.6627
Epoch 2/4


Training Epoch 2: 100%|██████████| 59/59 [00:28<00:00,  2.07it/s, loss=0.0878]


Epoch 2 Loss: 14.3224
Epoch 3/4


Training Epoch 3: 100%|██████████| 59/59 [00:27<00:00,  2.13it/s, loss=0.0519]


Epoch 3 Loss: 9.4647
Epoch 4/4


Training Epoch 4: 100%|██████████| 59/59 [00:28<00:00,  2.05it/s, loss=0.104] 


Epoch 4 Loss: 6.3671
Test Metrics: Precision=0.9352, Recall=0.9352, F1=0.9352

Fine-tuning roberta-large (large) with Train Size 465, Split 4...


Map: 100%|██████████| 465/465 [00:00<00:00, 8597.46 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 8298.84 examples/s]
Map: 100%|██████████| 690/690 [00:00<00:00, 9189.48 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 59/59 [00:27<00:00,  2.14it/s, loss=0.237]


Epoch 1 Loss: 28.2837
Epoch 2/4


Training Epoch 2: 100%|██████████| 59/59 [00:27<00:00,  2.16it/s, loss=0.136] 


Epoch 2 Loss: 13.0960
Epoch 3/4


Training Epoch 3: 100%|██████████| 59/59 [00:27<00:00,  2.11it/s, loss=0.0959]


Epoch 3 Loss: 9.2313
Epoch 4/4


Training Epoch 4: 100%|██████████| 59/59 [00:27<00:00,  2.16it/s, loss=0.0125]


Epoch 4 Loss: 6.3706
Test Metrics: Precision=0.9354, Recall=0.9354, F1=0.9354

Fine-tuning roberta-large (large) with Train Size 465, Split 5...


Map: 100%|██████████| 465/465 [00:00<00:00, 8603.83 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 6918.96 examples/s]
Map: 100%|██████████| 690/690 [00:00<00:00, 9205.26 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 59/59 [00:28<00:00,  2.04it/s, loss=0.137]


Epoch 1 Loss: 28.1011
Epoch 2/4


Training Epoch 2: 100%|██████████| 59/59 [00:28<00:00,  2.05it/s, loss=0.0675]


Epoch 2 Loss: 13.2605
Epoch 3/4


Training Epoch 3: 100%|██████████| 59/59 [00:28<00:00,  2.08it/s, loss=0.102] 


Epoch 3 Loss: 8.9218
Epoch 4/4


Training Epoch 4: 100%|██████████| 59/59 [00:29<00:00,  2.02it/s, loss=0.186] 


Epoch 4 Loss: 6.6150
Test Metrics: Precision=0.9387, Recall=0.9387, F1=0.9387

Fine-tuning roberta-large (large) with Train Size 470, Split 1...


Map: 100%|██████████| 470/470 [00:00<00:00, 9349.94 examples/s]
Map: 100%|██████████| 94/94 [00:00<00:00, 7569.20 examples/s]
Map: 100%|██████████| 684/684 [00:00<00:00, 9755.82 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 59/59 [00:26<00:00,  2.25it/s, loss=0.376]


Epoch 1 Loss: 29.1797
Epoch 2/4


Training Epoch 2: 100%|██████████| 59/59 [00:26<00:00,  2.21it/s, loss=0.221] 


Epoch 2 Loss: 12.6013
Epoch 3/4


Training Epoch 3: 100%|██████████| 59/59 [00:26<00:00,  2.19it/s, loss=0.0729]


Epoch 3 Loss: 8.2756
Epoch 4/4


Training Epoch 4: 100%|██████████| 59/59 [00:26<00:00,  2.23it/s, loss=0.126] 


Epoch 4 Loss: 6.0400
Test Metrics: Precision=0.9341, Recall=0.9341, F1=0.9341

Fine-tuning roberta-large (large) with Train Size 470, Split 2...


Map: 100%|██████████| 470/470 [00:00<00:00, 9153.23 examples/s]
Map: 100%|██████████| 94/94 [00:00<00:00, 7410.57 examples/s]
Map: 100%|██████████| 684/684 [00:00<00:00, 9419.86 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 59/59 [00:27<00:00,  2.14it/s, loss=0.203]


Epoch 1 Loss: 27.2514
Epoch 2/4


Training Epoch 2: 100%|██████████| 59/59 [00:27<00:00,  2.17it/s, loss=0.165] 


Epoch 2 Loss: 12.9936
Epoch 3/4


Training Epoch 3: 100%|██████████| 59/59 [00:26<00:00,  2.19it/s, loss=0.0942]


Epoch 3 Loss: 8.4757
Epoch 4/4


Training Epoch 4: 100%|██████████| 59/59 [00:27<00:00,  2.14it/s, loss=0.0803]


Epoch 4 Loss: 5.5994
Test Metrics: Precision=0.9367, Recall=0.9367, F1=0.9367

Fine-tuning roberta-large (large) with Train Size 470, Split 3...


Map: 100%|██████████| 470/470 [00:00<00:00, 8526.08 examples/s]
Map: 100%|██████████| 94/94 [00:00<00:00, 8394.32 examples/s]
Map: 100%|██████████| 684/684 [00:00<00:00, 9470.71 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 59/59 [00:29<00:00,  2.03it/s, loss=0.158]


Epoch 1 Loss: 26.1741
Epoch 2/4


Training Epoch 2: 100%|██████████| 59/59 [00:27<00:00,  2.12it/s, loss=0.183]


Epoch 2 Loss: 12.7337
Epoch 3/4


Training Epoch 3: 100%|██████████| 59/59 [00:27<00:00,  2.11it/s, loss=0.129] 


Epoch 3 Loss: 8.3096
Epoch 4/4


Training Epoch 4: 100%|██████████| 59/59 [00:28<00:00,  2.06it/s, loss=0.0624]


Epoch 4 Loss: 5.5725
Test Metrics: Precision=0.9357, Recall=0.9357, F1=0.9357

Fine-tuning roberta-large (large) with Train Size 470, Split 4...


Map: 100%|██████████| 470/470 [00:00<00:00, 8769.58 examples/s]
Map: 100%|██████████| 94/94 [00:00<00:00, 7670.96 examples/s]
Map: 100%|██████████| 684/684 [00:00<00:00, 9239.60 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 59/59 [00:27<00:00,  2.13it/s, loss=0.261]


Epoch 1 Loss: 28.5497
Epoch 2/4


Training Epoch 2: 100%|██████████| 59/59 [00:27<00:00,  2.14it/s, loss=0.27] 


Epoch 2 Loss: 13.9585
Epoch 3/4


Training Epoch 3: 100%|██████████| 59/59 [00:26<00:00,  2.19it/s, loss=0.109] 


Epoch 3 Loss: 9.1928
Epoch 4/4


Training Epoch 4: 100%|██████████| 59/59 [00:27<00:00,  2.16it/s, loss=0.159] 


Epoch 4 Loss: 6.9599
Test Metrics: Precision=0.9303, Recall=0.9303, F1=0.9303

Fine-tuning roberta-large (large) with Train Size 470, Split 5...


Map: 100%|██████████| 470/470 [00:00<00:00, 8739.53 examples/s]
Map: 100%|██████████| 94/94 [00:00<00:00, 7476.19 examples/s]
Map: 100%|██████████| 684/684 [00:00<00:00, 9479.53 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 59/59 [00:28<00:00,  2.05it/s, loss=0.245]


Epoch 1 Loss: 29.4211
Epoch 2/4


Training Epoch 2: 100%|██████████| 59/59 [00:29<00:00,  1.99it/s, loss=0.18] 


Epoch 2 Loss: 13.1680
Epoch 3/4


Training Epoch 3: 100%|██████████| 59/59 [00:29<00:00,  2.03it/s, loss=0.192] 


Epoch 3 Loss: 8.2563
Epoch 4/4


Training Epoch 4: 100%|██████████| 59/59 [00:29<00:00,  1.99it/s, loss=0.156] 


Epoch 4 Loss: 5.7802
Test Metrics: Precision=0.9397, Recall=0.9397, F1=0.9397

Fine-tuning roberta-large (large) with Train Size 475, Split 1...


Map: 100%|██████████| 475/475 [00:00<00:00, 8873.18 examples/s]
Map: 100%|██████████| 95/95 [00:00<00:00, 6888.39 examples/s]
Map: 100%|██████████| 678/678 [00:00<00:00, 9389.67 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 60/60 [00:26<00:00,  2.28it/s, loss=0.811]


Epoch 1 Loss: 31.8854
Epoch 2/4


Training Epoch 2: 100%|██████████| 60/60 [00:26<00:00,  2.28it/s, loss=0.28] 


Epoch 2 Loss: 17.1310
Epoch 3/4


Training Epoch 3: 100%|██████████| 60/60 [00:26<00:00,  2.24it/s, loss=0.128]


Epoch 3 Loss: 12.1070
Epoch 4/4


Training Epoch 4: 100%|██████████| 60/60 [00:26<00:00,  2.27it/s, loss=0.0932]


Epoch 4 Loss: 8.9778
Test Metrics: Precision=0.9273, Recall=0.9273, F1=0.9273

Fine-tuning roberta-large (large) with Train Size 475, Split 2...


Map: 100%|██████████| 475/475 [00:00<00:00, 8772.53 examples/s]
Map: 100%|██████████| 95/95 [00:00<00:00, 7251.69 examples/s]
Map: 100%|██████████| 678/678 [00:00<00:00, 9464.68 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 60/60 [00:27<00:00,  2.16it/s, loss=0.0743]


Epoch 1 Loss: 31.0204
Epoch 2/4


Training Epoch 2: 100%|██████████| 60/60 [00:28<00:00,  2.13it/s, loss=0.0405]


Epoch 2 Loss: 15.0065
Epoch 3/4


Training Epoch 3: 100%|██████████| 60/60 [00:27<00:00,  2.18it/s, loss=0.0765]


Epoch 3 Loss: 10.8266
Epoch 4/4


Training Epoch 4: 100%|██████████| 60/60 [00:27<00:00,  2.22it/s, loss=0.0995]


Epoch 4 Loss: 7.7942
Test Metrics: Precision=0.9328, Recall=0.9328, F1=0.9328

Fine-tuning roberta-large (large) with Train Size 475, Split 3...


Map: 100%|██████████| 475/475 [00:00<00:00, 8496.43 examples/s]
Map: 100%|██████████| 95/95 [00:00<00:00, 7077.93 examples/s]
Map: 100%|██████████| 678/678 [00:00<00:00, 9451.69 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 60/60 [00:29<00:00,  2.03it/s, loss=0.247]


Epoch 1 Loss: 27.8289
Epoch 2/4


Training Epoch 2: 100%|██████████| 60/60 [00:28<00:00,  2.09it/s, loss=0.195]


Epoch 2 Loss: 13.0326
Epoch 3/4


Training Epoch 3: 100%|██████████| 60/60 [00:29<00:00,  2.06it/s, loss=0.102] 


Epoch 3 Loss: 9.4364
Epoch 4/4


Training Epoch 4: 100%|██████████| 60/60 [00:28<00:00,  2.11it/s, loss=0.0417]


Epoch 4 Loss: 6.4956
Test Metrics: Precision=0.9335, Recall=0.9335, F1=0.9335

Fine-tuning roberta-large (large) with Train Size 475, Split 4...


Map: 100%|██████████| 475/475 [00:00<00:00, 8618.79 examples/s]
Map: 100%|██████████| 95/95 [00:00<00:00, 7196.17 examples/s]
Map: 100%|██████████| 678/678 [00:00<00:00, 9661.77 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 60/60 [00:28<00:00,  2.11it/s, loss=0.308]


Epoch 1 Loss: 29.5412
Epoch 2/4


Training Epoch 2: 100%|██████████| 60/60 [00:28<00:00,  2.11it/s, loss=0.238]


Epoch 2 Loss: 13.2752
Epoch 3/4


Training Epoch 3: 100%|██████████| 60/60 [00:28<00:00,  2.13it/s, loss=0.108] 


Epoch 3 Loss: 8.7070
Epoch 4/4


Training Epoch 4: 100%|██████████| 60/60 [00:27<00:00,  2.18it/s, loss=0.0528]


Epoch 4 Loss: 6.0971
Test Metrics: Precision=0.9343, Recall=0.9343, F1=0.9343

Fine-tuning roberta-large (large) with Train Size 475, Split 5...


Map: 100%|██████████| 475/475 [00:00<00:00, 8742.81 examples/s]
Map: 100%|██████████| 95/95 [00:00<00:00, 6778.36 examples/s]
Map: 100%|██████████| 678/678 [00:00<00:00, 9569.72 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 60/60 [00:29<00:00,  2.04it/s, loss=0.383]


Epoch 1 Loss: 28.4650
Epoch 2/4


Training Epoch 2: 100%|██████████| 60/60 [00:29<00:00,  2.05it/s, loss=0.313] 


Epoch 2 Loss: 13.3487
Epoch 3/4


Training Epoch 3: 100%|██████████| 60/60 [00:28<00:00,  2.13it/s, loss=0.057] 


Epoch 3 Loss: 8.6092
Epoch 4/4


Training Epoch 4: 100%|██████████| 60/60 [00:28<00:00,  2.11it/s, loss=0.125] 


Epoch 4 Loss: 6.0818
Test Metrics: Precision=0.9370, Recall=0.9370, F1=0.9370

Fine-tuning roberta-large (large) with Train Size 480, Split 1...


Map: 100%|██████████| 480/480 [00:00<00:00, 9042.90 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7029.68 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9279.01 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 60/60 [00:26<00:00,  2.26it/s, loss=0.23] 


Epoch 1 Loss: 29.9595
Epoch 2/4


Training Epoch 2: 100%|██████████| 60/60 [00:27<00:00,  2.21it/s, loss=0.184]


Epoch 2 Loss: 17.0560
Epoch 3/4


Training Epoch 3: 100%|██████████| 60/60 [00:26<00:00,  2.23it/s, loss=0.227] 


Epoch 3 Loss: 11.3498
Epoch 4/4


Training Epoch 4: 100%|██████████| 60/60 [00:26<00:00,  2.25it/s, loss=0.108] 


Epoch 4 Loss: 9.0702
Test Metrics: Precision=0.9289, Recall=0.9289, F1=0.9289

Fine-tuning roberta-large (large) with Train Size 480, Split 2...


Map: 100%|██████████| 480/480 [00:00<00:00, 9023.20 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7716.77 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9022.95 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 60/60 [00:28<00:00,  2.14it/s, loss=0.31] 


Epoch 1 Loss: 27.1911
Epoch 2/4


Training Epoch 2: 100%|██████████| 60/60 [00:28<00:00,  2.11it/s, loss=0.327] 


Epoch 2 Loss: 14.5385
Epoch 3/4


Training Epoch 3: 100%|██████████| 60/60 [00:28<00:00,  2.11it/s, loss=0.289] 


Epoch 3 Loss: 10.4241
Epoch 4/4


Training Epoch 4: 100%|██████████| 60/60 [00:28<00:00,  2.14it/s, loss=0.131] 


Epoch 4 Loss: 7.8396
Test Metrics: Precision=0.9346, Recall=0.9346, F1=0.9346

Fine-tuning roberta-large (large) with Train Size 480, Split 3...


Map: 100%|██████████| 480/480 [00:00<00:00, 8694.21 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7281.38 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9640.30 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 60/60 [00:28<00:00,  2.12it/s, loss=0.27] 


Epoch 1 Loss: 27.8414
Epoch 2/4


Training Epoch 2: 100%|██████████| 60/60 [00:29<00:00,  2.04it/s, loss=0.324] 


Epoch 2 Loss: 13.4589
Epoch 3/4


Training Epoch 3: 100%|██████████| 60/60 [00:29<00:00,  2.04it/s, loss=0.0905]


Epoch 3 Loss: 8.7004
Epoch 4/4


Training Epoch 4: 100%|██████████| 60/60 [00:28<00:00,  2.09it/s, loss=0.139] 


Epoch 4 Loss: 5.9690
Test Metrics: Precision=0.9366, Recall=0.9366, F1=0.9366

Fine-tuning roberta-large (large) with Train Size 480, Split 4...


Map: 100%|██████████| 480/480 [00:00<00:00, 8832.67 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7045.18 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9289.56 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 60/60 [00:28<00:00,  2.08it/s, loss=0.244]


Epoch 1 Loss: 27.5062
Epoch 2/4


Training Epoch 2: 100%|██████████| 60/60 [00:27<00:00,  2.17it/s, loss=0.188]


Epoch 2 Loss: 12.7462
Epoch 3/4


Training Epoch 3: 100%|██████████| 60/60 [00:28<00:00,  2.09it/s, loss=0.199] 


Epoch 3 Loss: 8.3151
Epoch 4/4


Training Epoch 4: 100%|██████████| 60/60 [00:28<00:00,  2.13it/s, loss=0.084] 


Epoch 4 Loss: 6.0868
Test Metrics: Precision=0.9350, Recall=0.9350, F1=0.9350

Fine-tuning roberta-large (large) with Train Size 480, Split 5...


Map: 100%|██████████| 480/480 [00:00<00:00, 8844.97 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 8004.56 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9236.62 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 60/60 [00:29<00:00,  2.00it/s, loss=0.257]


Epoch 1 Loss: 33.1819
Epoch 2/4


Training Epoch 2: 100%|██████████| 60/60 [00:30<00:00,  1.99it/s, loss=0.206]


Epoch 2 Loss: 16.7717
Epoch 3/4


Training Epoch 3: 100%|██████████| 60/60 [00:28<00:00,  2.07it/s, loss=0.155]


Epoch 3 Loss: 12.8600
Epoch 4/4


Training Epoch 4: 100%|██████████| 60/60 [00:29<00:00,  2.06it/s, loss=0.259] 


Epoch 4 Loss: 10.0555
Test Metrics: Precision=0.9344, Recall=0.9344, F1=0.9344

Fine-tuning roberta-large (large) with Train Size 485, Split 1...


Map: 100%|██████████| 485/485 [00:00<00:00, 9051.88 examples/s]
Map: 100%|██████████| 97/97 [00:00<00:00, 7704.13 examples/s]
Map: 100%|██████████| 666/666 [00:00<00:00, 9269.43 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 61/61 [00:27<00:00,  2.24it/s, loss=0.513]


Epoch 1 Loss: 30.9220
Epoch 2/4


Training Epoch 2: 100%|██████████| 61/61 [00:27<00:00,  2.21it/s, loss=0.284]


Epoch 2 Loss: 16.0988
Epoch 3/4


Training Epoch 3: 100%|██████████| 61/61 [00:27<00:00,  2.19it/s, loss=0.198] 


Epoch 3 Loss: 11.9813
Epoch 4/4


Training Epoch 4: 100%|██████████| 61/61 [00:26<00:00,  2.31it/s, loss=0.167] 


Epoch 4 Loss: 9.1461
Test Metrics: Precision=0.9305, Recall=0.9305, F1=0.9305

Fine-tuning roberta-large (large) with Train Size 485, Split 2...


Map: 100%|██████████| 485/485 [00:00<00:00, 9294.57 examples/s]
Map: 100%|██████████| 97/97 [00:00<00:00, 7479.09 examples/s]
Map: 100%|██████████| 666/666 [00:00<00:00, 9220.53 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 61/61 [00:28<00:00,  2.14it/s, loss=0.221]


Epoch 1 Loss: 31.0264
Epoch 2/4


Training Epoch 2: 100%|██████████| 61/61 [00:27<00:00,  2.19it/s, loss=0.234]


Epoch 2 Loss: 14.1466
Epoch 3/4


Training Epoch 3: 100%|██████████| 61/61 [00:28<00:00,  2.12it/s, loss=0.164] 


Epoch 3 Loss: 10.5001
Epoch 4/4


Training Epoch 4: 100%|██████████| 61/61 [00:27<00:00,  2.23it/s, loss=0.154] 


Epoch 4 Loss: 7.5763
Test Metrics: Precision=0.9345, Recall=0.9345, F1=0.9345

Fine-tuning roberta-large (large) with Train Size 485, Split 3...


Map: 100%|██████████| 485/485 [00:00<00:00, 8488.05 examples/s]
Map: 100%|██████████| 97/97 [00:00<00:00, 8728.01 examples/s]
Map: 100%|██████████| 666/666 [00:00<00:00, 9655.28 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 61/61 [00:29<00:00,  2.07it/s, loss=0.192]


Epoch 1 Loss: 26.5770
Epoch 2/4


Training Epoch 2: 100%|██████████| 61/61 [00:29<00:00,  2.04it/s, loss=0.397] 


Epoch 2 Loss: 13.8501
Epoch 3/4


Training Epoch 3: 100%|██████████| 61/61 [00:29<00:00,  2.10it/s, loss=0.221] 


Epoch 3 Loss: 8.5951
Epoch 4/4


Training Epoch 4: 100%|██████████| 61/61 [00:29<00:00,  2.06it/s, loss=0.0889]


Epoch 4 Loss: 6.1206
Test Metrics: Precision=0.9367, Recall=0.9367, F1=0.9367

Fine-tuning roberta-large (large) with Train Size 485, Split 4...


Map: 100%|██████████| 485/485 [00:00<00:00, 8844.20 examples/s]
Map: 100%|██████████| 97/97 [00:00<00:00, 7360.03 examples/s]
Map: 100%|██████████| 666/666 [00:00<00:00, 9409.78 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 61/61 [00:28<00:00,  2.12it/s, loss=0.288]


Epoch 1 Loss: 28.3633
Epoch 2/4


Training Epoch 2: 100%|██████████| 61/61 [00:28<00:00,  2.14it/s, loss=0.212] 


Epoch 2 Loss: 12.6283
Epoch 3/4


Training Epoch 3: 100%|██████████| 61/61 [00:28<00:00,  2.14it/s, loss=0.155] 


Epoch 3 Loss: 8.5982
Epoch 4/4


Training Epoch 4: 100%|██████████| 61/61 [00:28<00:00,  2.12it/s, loss=0.0906]


Epoch 4 Loss: 6.0170
Test Metrics: Precision=0.9356, Recall=0.9356, F1=0.9356

Fine-tuning roberta-large (large) with Train Size 485, Split 5...


Map: 100%|██████████| 485/485 [00:00<00:00, 8700.28 examples/s]
Map: 100%|██████████| 97/97 [00:00<00:00, 7309.38 examples/s]
Map: 100%|██████████| 666/666 [00:00<00:00, 9348.56 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 61/61 [00:29<00:00,  2.08it/s, loss=0.582]


Epoch 1 Loss: 32.1729
Epoch 2/4


Training Epoch 2: 100%|██████████| 61/61 [00:28<00:00,  2.11it/s, loss=0.316]


Epoch 2 Loss: 16.6257
Epoch 3/4


Training Epoch 3: 100%|██████████| 61/61 [00:29<00:00,  2.09it/s, loss=0.133] 


Epoch 3 Loss: 11.6456
Epoch 4/4


Training Epoch 4: 100%|██████████| 61/61 [00:29<00:00,  2.10it/s, loss=0.179] 


Epoch 4 Loss: 8.7257
Test Metrics: Precision=0.9363, Recall=0.9363, F1=0.9363

Fine-tuning roberta-large (large) with Train Size 490, Split 1...


Map: 100%|██████████| 490/490 [00:00<00:00, 8986.76 examples/s]
Map: 100%|██████████| 98/98 [00:00<00:00, 7572.34 examples/s]
Map: 100%|██████████| 660/660 [00:00<00:00, 9565.12 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 62/62 [00:27<00:00,  2.24it/s, loss=0.22] 


Epoch 1 Loss: 29.4700
Epoch 2/4


Training Epoch 2: 100%|██████████| 62/62 [00:27<00:00,  2.24it/s, loss=0.243]


Epoch 2 Loss: 13.6687
Epoch 3/4


Training Epoch 3: 100%|██████████| 62/62 [00:27<00:00,  2.28it/s, loss=0.135] 


Epoch 3 Loss: 9.8543
Epoch 4/4


Training Epoch 4: 100%|██████████| 62/62 [00:27<00:00,  2.27it/s, loss=0.0316]


Epoch 4 Loss: 6.5252
Test Metrics: Precision=0.9338, Recall=0.9338, F1=0.9338

Fine-tuning roberta-large (large) with Train Size 490, Split 2...


Map: 100%|██████████| 490/490 [00:00<00:00, 9220.94 examples/s]
Map: 100%|██████████| 98/98 [00:00<00:00, 7614.71 examples/s]
Map: 100%|██████████| 660/660 [00:00<00:00, 9001.94 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 62/62 [00:28<00:00,  2.16it/s, loss=0.497]


Epoch 1 Loss: 30.8881
Epoch 2/4


Training Epoch 2: 100%|██████████| 62/62 [00:28<00:00,  2.17it/s, loss=0.311]


Epoch 2 Loss: 15.4914
Epoch 3/4


Training Epoch 3: 100%|██████████| 62/62 [00:29<00:00,  2.13it/s, loss=0.0881]


Epoch 3 Loss: 9.9979
Epoch 4/4


Training Epoch 4: 100%|██████████| 62/62 [00:27<00:00,  2.22it/s, loss=0.225] 


Epoch 4 Loss: 7.1194
Test Metrics: Precision=0.9351, Recall=0.9351, F1=0.9351

Fine-tuning roberta-large (large) with Train Size 490, Split 3...


Map: 100%|██████████| 490/490 [00:00<00:00, 8641.29 examples/s]
Map: 100%|██████████| 98/98 [00:00<00:00, 8302.70 examples/s]
Map: 100%|██████████| 660/660 [00:00<00:00, 9080.07 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 62/62 [00:29<00:00,  2.09it/s, loss=0.336]


Epoch 1 Loss: 30.3996
Epoch 2/4


Training Epoch 2: 100%|██████████| 62/62 [00:29<00:00,  2.09it/s, loss=0.138] 


Epoch 2 Loss: 13.2683
Epoch 3/4


Training Epoch 3: 100%|██████████| 62/62 [00:29<00:00,  2.09it/s, loss=0.133] 


Epoch 3 Loss: 8.7368
Epoch 4/4


Training Epoch 4: 100%|██████████| 62/62 [00:29<00:00,  2.11it/s, loss=0.196] 


Epoch 4 Loss: 6.0232
Test Metrics: Precision=0.9365, Recall=0.9365, F1=0.9365

Fine-tuning roberta-large (large) with Train Size 490, Split 4...


Map: 100%|██████████| 490/490 [00:00<00:00, 8942.07 examples/s]
Map: 100%|██████████| 98/98 [00:00<00:00, 8306.22 examples/s]
Map: 100%|██████████| 660/660 [00:00<00:00, 9678.28 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 62/62 [00:28<00:00,  2.14it/s, loss=0.879]


Epoch 1 Loss: 31.0513
Epoch 2/4


Training Epoch 2: 100%|██████████| 62/62 [00:29<00:00,  2.12it/s, loss=0.103] 


Epoch 2 Loss: 15.9625
Epoch 3/4


Training Epoch 3: 100%|██████████| 62/62 [00:28<00:00,  2.14it/s, loss=0.124] 


Epoch 3 Loss: 10.5266
Epoch 4/4


Training Epoch 4: 100%|██████████| 62/62 [00:28<00:00,  2.16it/s, loss=0.0308]


Epoch 4 Loss: 7.7110
Test Metrics: Precision=0.9332, Recall=0.9332, F1=0.9332

Fine-tuning roberta-large (large) with Train Size 490, Split 5...


Map: 100%|██████████| 490/490 [00:00<00:00, 8879.29 examples/s]
Map: 100%|██████████| 98/98 [00:00<00:00, 8012.98 examples/s]
Map: 100%|██████████| 660/660 [00:00<00:00, 9420.01 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 62/62 [00:29<00:00,  2.11it/s, loss=0.425]


Epoch 1 Loss: 27.9901
Epoch 2/4


Training Epoch 2: 100%|██████████| 62/62 [00:30<00:00,  2.03it/s, loss=0.111] 


Epoch 2 Loss: 13.9113
Epoch 3/4


Training Epoch 3: 100%|██████████| 62/62 [00:29<00:00,  2.09it/s, loss=0.64]  


Epoch 3 Loss: 9.9020
Epoch 4/4


Training Epoch 4: 100%|██████████| 62/62 [00:30<00:00,  2.05it/s, loss=0.109] 


Epoch 4 Loss: 7.1799
Test Metrics: Precision=0.9388, Recall=0.9388, F1=0.9388

Fine-tuning roberta-large (large) with Train Size 495, Split 1...


Map: 100%|██████████| 495/495 [00:00<00:00, 9167.90 examples/s]
Map: 100%|██████████| 99/99 [00:00<00:00, 7715.56 examples/s]
Map: 100%|██████████| 654/654 [00:00<00:00, 8965.62 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 62/62 [00:27<00:00,  2.24it/s, loss=0.264]


Epoch 1 Loss: 29.7847
Epoch 2/4


Training Epoch 2: 100%|██████████| 62/62 [00:26<00:00,  2.30it/s, loss=0.212] 


Epoch 2 Loss: 13.5569
Epoch 3/4


Training Epoch 3: 100%|██████████| 62/62 [00:27<00:00,  2.24it/s, loss=0.126] 


Epoch 3 Loss: 9.6253
Epoch 4/4


Training Epoch 4: 100%|██████████| 62/62 [00:27<00:00,  2.22it/s, loss=0.0919]


Epoch 4 Loss: 6.0865
Test Metrics: Precision=0.9349, Recall=0.9349, F1=0.9349

Fine-tuning roberta-large (large) with Train Size 495, Split 2...


Map: 100%|██████████| 495/495 [00:00<00:00, 9313.86 examples/s]
Map: 100%|██████████| 99/99 [00:00<00:00, 8077.58 examples/s]
Map: 100%|██████████| 654/654 [00:00<00:00, 9012.66 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 62/62 [00:29<00:00,  2.11it/s, loss=0.405]


Epoch 1 Loss: 30.1485
Epoch 2/4


Training Epoch 2: 100%|██████████| 62/62 [00:29<00:00,  2.11it/s, loss=0.231]


Epoch 2 Loss: 16.0710
Epoch 3/4


Training Epoch 3: 100%|██████████| 62/62 [00:28<00:00,  2.16it/s, loss=0.217] 


Epoch 3 Loss: 11.5287
Epoch 4/4


Training Epoch 4: 100%|██████████| 62/62 [00:28<00:00,  2.15it/s, loss=0.109] 


Epoch 4 Loss: 8.2710
Test Metrics: Precision=0.9342, Recall=0.9342, F1=0.9342

Fine-tuning roberta-large (large) with Train Size 495, Split 3...


Map: 100%|██████████| 495/495 [00:00<00:00, 8772.06 examples/s]
Map: 100%|██████████| 99/99 [00:00<00:00, 7858.07 examples/s]
Map: 100%|██████████| 654/654 [00:00<00:00, 9658.20 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 62/62 [00:30<00:00,  2.04it/s, loss=0.21] 


Epoch 1 Loss: 29.6555
Epoch 2/4


Training Epoch 2: 100%|██████████| 62/62 [00:29<00:00,  2.07it/s, loss=0.13]  


Epoch 2 Loss: 14.3169
Epoch 3/4


Training Epoch 3: 100%|██████████| 62/62 [00:29<00:00,  2.08it/s, loss=0.133] 


Epoch 3 Loss: 8.8697
Epoch 4/4


Training Epoch 4: 100%|██████████| 62/62 [00:30<00:00,  2.06it/s, loss=0.0993]


Epoch 4 Loss: 6.1419
Test Metrics: Precision=0.9350, Recall=0.9350, F1=0.9350

Fine-tuning roberta-large (large) with Train Size 495, Split 4...


Map: 100%|██████████| 495/495 [00:00<00:00, 8773.88 examples/s]
Map: 100%|██████████| 99/99 [00:00<00:00, 7973.96 examples/s]
Map: 100%|██████████| 654/654 [00:00<00:00, 9291.50 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 62/62 [00:29<00:00,  2.13it/s, loss=0.29] 


Epoch 1 Loss: 29.6965
Epoch 2/4


Training Epoch 2: 100%|██████████| 62/62 [00:29<00:00,  2.12it/s, loss=0.248] 


Epoch 2 Loss: 14.3764
Epoch 3/4


Training Epoch 3: 100%|██████████| 62/62 [00:28<00:00,  2.18it/s, loss=0.0985]


Epoch 3 Loss: 9.7349
Epoch 4/4


Training Epoch 4: 100%|██████████| 62/62 [00:28<00:00,  2.14it/s, loss=0.0849]


Epoch 4 Loss: 7.0286
Test Metrics: Precision=0.9327, Recall=0.9327, F1=0.9327

Fine-tuning roberta-large (large) with Train Size 495, Split 5...


Map: 100%|██████████| 495/495 [00:00<00:00, 8867.22 examples/s]
Map: 100%|██████████| 99/99 [00:00<00:00, 7564.46 examples/s]
Map: 100%|██████████| 654/654 [00:00<00:00, 9477.90 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 62/62 [00:31<00:00,  1.98it/s, loss=0.254]


Epoch 1 Loss: 30.0850
Epoch 2/4


Training Epoch 2: 100%|██████████| 62/62 [00:31<00:00,  1.97it/s, loss=0.24]  


Epoch 2 Loss: 14.6247
Epoch 3/4


Training Epoch 3: 100%|██████████| 62/62 [00:30<00:00,  2.03it/s, loss=0.0979]


Epoch 3 Loss: 9.8165
Epoch 4/4


Training Epoch 4: 100%|██████████| 62/62 [00:29<00:00,  2.07it/s, loss=0.0821]


Epoch 4 Loss: 6.6731
Test Metrics: Precision=0.9392, Recall=0.9392, F1=0.9392

Fine-tuning roberta-large (large) with Train Size 500, Split 1...


Map: 100%|██████████| 500/500 [00:00<00:00, 9000.69 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7631.56 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 3407.34 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 63/63 [00:27<00:00,  2.27it/s, loss=0.265]


Epoch 1 Loss: 29.4240
Epoch 2/4


Training Epoch 2: 100%|██████████| 63/63 [00:27<00:00,  2.26it/s, loss=0.198]


Epoch 2 Loss: 13.1369
Epoch 3/4


Training Epoch 3: 100%|██████████| 63/63 [00:28<00:00,  2.23it/s, loss=0.144] 


Epoch 3 Loss: 8.9358
Epoch 4/4


Training Epoch 4: 100%|██████████| 63/63 [00:27<00:00,  2.31it/s, loss=0.0825]


Epoch 4 Loss: 6.3657
Test Metrics: Precision=0.9349, Recall=0.9349, F1=0.9349

Fine-tuning roberta-large (large) with Train Size 500, Split 2...


Map: 100%|██████████| 500/500 [00:00<00:00, 8887.14 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7756.46 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9022.40 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 63/63 [00:28<00:00,  2.20it/s, loss=0.378]


Epoch 1 Loss: 30.0617
Epoch 2/4


Training Epoch 2: 100%|██████████| 63/63 [00:28<00:00,  2.22it/s, loss=0.185]


Epoch 2 Loss: 14.1360
Epoch 3/4


Training Epoch 3: 100%|██████████| 63/63 [00:28<00:00,  2.18it/s, loss=0.0442]


Epoch 3 Loss: 9.4000
Epoch 4/4


Training Epoch 4: 100%|██████████| 63/63 [00:28<00:00,  2.23it/s, loss=0.0716]


Epoch 4 Loss: 6.4198
Test Metrics: Precision=0.9347, Recall=0.9347, F1=0.9347

Fine-tuning roberta-large (large) with Train Size 500, Split 3...


Map: 100%|██████████| 500/500 [00:00<00:00, 8697.54 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 8337.25 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9606.29 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 63/63 [00:30<00:00,  2.06it/s, loss=0.175]


Epoch 1 Loss: 30.4008
Epoch 2/4


Training Epoch 2: 100%|██████████| 63/63 [00:30<00:00,  2.07it/s, loss=0.208]


Epoch 2 Loss: 14.4994
Epoch 3/4


Training Epoch 3: 100%|██████████| 63/63 [00:30<00:00,  2.06it/s, loss=0.207] 


Epoch 3 Loss: 10.1451
Epoch 4/4


Training Epoch 4: 100%|██████████| 63/63 [00:30<00:00,  2.07it/s, loss=0.0622]


Epoch 4 Loss: 7.4835
Test Metrics: Precision=0.9350, Recall=0.9350, F1=0.9350

Fine-tuning roberta-large (large) with Train Size 500, Split 4...


Map: 100%|██████████| 500/500 [00:00<00:00, 8909.45 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7265.76 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9401.24 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 63/63 [00:28<00:00,  2.19it/s, loss=0.184]


Epoch 1 Loss: 28.9188
Epoch 2/4


Training Epoch 2: 100%|██████████| 63/63 [00:29<00:00,  2.16it/s, loss=0.367]


Epoch 2 Loss: 13.8515
Epoch 3/4


Training Epoch 3: 100%|██████████| 63/63 [00:29<00:00,  2.15it/s, loss=0.476] 


Epoch 3 Loss: 11.5943
Epoch 4/4


Training Epoch 4: 100%|██████████| 63/63 [00:28<00:00,  2.17it/s, loss=0.0593]


Epoch 4 Loss: 7.8765
Test Metrics: Precision=0.9336, Recall=0.9336, F1=0.9336

Fine-tuning roberta-large (large) with Train Size 500, Split 5...


Map: 100%|██████████| 500/500 [00:00<00:00, 8821.46 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 8667.71 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9040.41 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/4


Training Epoch 1: 100%|██████████| 63/63 [00:30<00:00,  2.07it/s, loss=0.295]


Epoch 1 Loss: 30.1973
Epoch 2/4


Training Epoch 2: 100%|██████████| 63/63 [00:30<00:00,  2.06it/s, loss=0.217] 


Epoch 2 Loss: 15.6231
Epoch 3/4


Training Epoch 3: 100%|██████████| 63/63 [00:29<00:00,  2.12it/s, loss=0.196] 


Epoch 3 Loss: 10.5744
Epoch 4/4


Training Epoch 4: 100%|██████████| 63/63 [00:31<00:00,  2.01it/s, loss=0.0924]


Epoch 4 Loss: 7.6449
Test Metrics: Precision=0.9372, Recall=0.9372, F1=0.9372
Results saved to Experiments_epoch4_roberta.xlsx


In [14]:
models = {
    "large": "roberta-large"
}

iterate_and_finetune_with_torch(dataset=dataset, file_name='Experiments_epoch5_roberta.xlsx', models=models, start_size=210, end_size=500, step_size=5, num_epochs=5)


Fine-tuning roberta-large (large) with Train Size 210, Split 1...


Map: 100%|██████████| 210/210 [00:00<00:00, 7713.02 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 6234.23 examples/s]
Map: 100%|██████████| 996/996 [00:00<00:00, 9574.06 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 27/27 [00:11<00:00,  2.35it/s, loss=0.356]


Epoch 1 Loss: 17.5578
Epoch 2/5


Training Epoch 2: 100%|██████████| 27/27 [00:11<00:00,  2.34it/s, loss=0.255]


Epoch 2 Loss: 7.8953
Epoch 3/5


Training Epoch 3: 100%|██████████| 27/27 [00:11<00:00,  2.29it/s, loss=0.0559]


Epoch 3 Loss: 5.4315
Epoch 4/5


Training Epoch 4: 100%|██████████| 27/27 [00:11<00:00,  2.30it/s, loss=0.173] 


Epoch 4 Loss: 4.0549
Epoch 5/5


Training Epoch 5: 100%|██████████| 27/27 [00:12<00:00,  2.22it/s, loss=0.0975]


Epoch 5 Loss: 3.2279
Test Metrics: Precision=0.9260, Recall=0.9260, F1=0.9260

Fine-tuning roberta-large (large) with Train Size 210, Split 2...


Map: 100%|██████████| 210/210 [00:00<00:00, 8324.71 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 6689.48 examples/s]
Map: 100%|██████████| 996/996 [00:00<00:00, 9636.87 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 27/27 [00:12<00:00,  2.21it/s, loss=0.691]


Epoch 1 Loss: 19.0394
Epoch 2/5


Training Epoch 2: 100%|██████████| 27/27 [00:12<00:00,  2.15it/s, loss=0.452]


Epoch 2 Loss: 9.1653
Epoch 3/5


Training Epoch 3: 100%|██████████| 27/27 [00:11<00:00,  2.33it/s, loss=0.0417]


Epoch 3 Loss: 6.0342
Epoch 4/5


Training Epoch 4: 100%|██████████| 27/27 [00:11<00:00,  2.32it/s, loss=0.0639]


Epoch 4 Loss: 3.9956
Epoch 5/5


Training Epoch 5: 100%|██████████| 27/27 [00:10<00:00,  2.46it/s, loss=0.184] 


Epoch 5 Loss: 3.1816
Test Metrics: Precision=0.9272, Recall=0.9272, F1=0.9272

Fine-tuning roberta-large (large) with Train Size 210, Split 3...


Map: 100%|██████████| 210/210 [00:00<00:00, 8533.02 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 6102.29 examples/s]
Map: 100%|██████████| 996/996 [00:00<00:00, 9614.58 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 27/27 [00:11<00:00,  2.34it/s, loss=0.391]


Epoch 1 Loss: 17.1447
Epoch 2/5


Training Epoch 2: 100%|██████████| 27/27 [00:11<00:00,  2.26it/s, loss=0.458]


Epoch 2 Loss: 8.0076
Epoch 3/5


Training Epoch 3: 100%|██████████| 27/27 [00:11<00:00,  2.32it/s, loss=0.33] 


Epoch 3 Loss: 5.6543
Epoch 4/5


Training Epoch 4: 100%|██████████| 27/27 [00:11<00:00,  2.29it/s, loss=0.101] 


Epoch 4 Loss: 3.5945
Epoch 5/5


Training Epoch 5: 100%|██████████| 27/27 [00:11<00:00,  2.33it/s, loss=0.129] 


Epoch 5 Loss: 2.6827
Test Metrics: Precision=0.9286, Recall=0.9286, F1=0.9286

Fine-tuning roberta-large (large) with Train Size 210, Split 4...


Map: 100%|██████████| 210/210 [00:00<00:00, 8605.72 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 5224.69 examples/s]
Map: 100%|██████████| 996/996 [00:00<00:00, 9421.45 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 27/27 [00:10<00:00,  2.47it/s, loss=0.536]


Epoch 1 Loss: 17.8096
Epoch 2/5


Training Epoch 2: 100%|██████████| 27/27 [00:10<00:00,  2.49it/s, loss=0.156]


Epoch 2 Loss: 8.7420
Epoch 3/5


Training Epoch 3: 100%|██████████| 27/27 [00:10<00:00,  2.46it/s, loss=0.199] 


Epoch 3 Loss: 6.1734
Epoch 4/5


Training Epoch 4: 100%|██████████| 27/27 [00:11<00:00,  2.38it/s, loss=0.0161]


Epoch 4 Loss: 4.3721
Epoch 5/5


Training Epoch 5: 100%|██████████| 27/27 [00:11<00:00,  2.34it/s, loss=0.0976]


Epoch 5 Loss: 3.4677
Test Metrics: Precision=0.9266, Recall=0.9266, F1=0.9266

Fine-tuning roberta-large (large) with Train Size 210, Split 5...


Map: 100%|██████████| 210/210 [00:00<00:00, 8590.53 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 6016.21 examples/s]
Map: 100%|██████████| 996/996 [00:00<00:00, 9837.37 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 27/27 [00:13<00:00,  1.94it/s, loss=0.246]


Epoch 1 Loss: 18.9874
Epoch 2/5


Training Epoch 2: 100%|██████████| 27/27 [00:14<00:00,  1.92it/s, loss=1.4]  


Epoch 2 Loss: 10.4822
Epoch 3/5


Training Epoch 3: 100%|██████████| 27/27 [00:14<00:00,  1.91it/s, loss=0.366]


Epoch 3 Loss: 6.8966
Epoch 4/5


Training Epoch 4: 100%|██████████| 27/27 [00:14<00:00,  1.85it/s, loss=0.0721]


Epoch 4 Loss: 5.1915
Epoch 5/5


Training Epoch 5: 100%|██████████| 27/27 [00:13<00:00,  2.06it/s, loss=0.124] 


Epoch 5 Loss: 4.4290
Test Metrics: Precision=0.9248, Recall=0.9248, F1=0.9248

Fine-tuning roberta-large (large) with Train Size 215, Split 1...


Map: 100%|██████████| 215/215 [00:00<00:00, 8430.80 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 6080.75 examples/s]
Map: 100%|██████████| 990/990 [00:00<00:00, 9501.58 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 27/27 [00:12<00:00,  2.25it/s, loss=0.579]


Epoch 1 Loss: 16.5754
Epoch 2/5


Training Epoch 2: 100%|██████████| 27/27 [00:11<00:00,  2.32it/s, loss=0.363]


Epoch 2 Loss: 7.5765
Epoch 3/5


Training Epoch 3: 100%|██████████| 27/27 [00:11<00:00,  2.28it/s, loss=0.16]  


Epoch 3 Loss: 4.8402
Epoch 4/5


Training Epoch 4: 100%|██████████| 27/27 [00:12<00:00,  2.19it/s, loss=0.0794]


Epoch 4 Loss: 3.3644
Epoch 5/5


Training Epoch 5: 100%|██████████| 27/27 [00:12<00:00,  2.23it/s, loss=0.0991]


Epoch 5 Loss: 2.5844
Test Metrics: Precision=0.9285, Recall=0.9285, F1=0.9285

Fine-tuning roberta-large (large) with Train Size 215, Split 2...


Map: 100%|██████████| 215/215 [00:00<00:00, 8860.92 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 5865.40 examples/s]
Map: 100%|██████████| 990/990 [00:00<00:00, 9679.32 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 27/27 [00:12<00:00,  2.21it/s, loss=0.395]


Epoch 1 Loss: 18.4168
Epoch 2/5


Training Epoch 2: 100%|██████████| 27/27 [00:12<00:00,  2.19it/s, loss=0.272]


Epoch 2 Loss: 8.8689
Epoch 3/5


Training Epoch 3: 100%|██████████| 27/27 [00:11<00:00,  2.33it/s, loss=0.117]


Epoch 3 Loss: 5.8602
Epoch 4/5


Training Epoch 4: 100%|██████████| 27/27 [00:12<00:00,  2.10it/s, loss=0.151] 


Epoch 4 Loss: 3.9754
Epoch 5/5


Training Epoch 5: 100%|██████████| 27/27 [00:12<00:00,  2.23it/s, loss=0.063] 


Epoch 5 Loss: 3.0744
Test Metrics: Precision=0.9243, Recall=0.9243, F1=0.9243

Fine-tuning roberta-large (large) with Train Size 215, Split 3...


Map: 100%|██████████| 215/215 [00:00<00:00, 8200.79 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 5722.11 examples/s]
Map: 100%|██████████| 990/990 [00:00<00:00, 9436.13 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 27/27 [00:13<00:00,  2.03it/s, loss=0.271]


Epoch 1 Loss: 18.4871
Epoch 2/5


Training Epoch 2: 100%|██████████| 27/27 [00:12<00:00,  2.11it/s, loss=0.448]


Epoch 2 Loss: 8.1738
Epoch 3/5


Training Epoch 3: 100%|██████████| 27/27 [00:13<00:00,  2.06it/s, loss=0.14] 


Epoch 3 Loss: 5.7201
Epoch 4/5


Training Epoch 4: 100%|██████████| 27/27 [00:13<00:00,  2.06it/s, loss=0.116] 


Epoch 4 Loss: 3.9588
Epoch 5/5


Training Epoch 5: 100%|██████████| 27/27 [00:13<00:00,  2.03it/s, loss=0.135] 


Epoch 5 Loss: 3.0854
Test Metrics: Precision=0.9289, Recall=0.9289, F1=0.9289

Fine-tuning roberta-large (large) with Train Size 215, Split 4...


Map: 100%|██████████| 215/215 [00:00<00:00, 8467.53 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 5529.82 examples/s]
Map: 100%|██████████| 990/990 [00:00<00:00, 9459.50 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 27/27 [00:12<00:00,  2.17it/s, loss=0.387]


Epoch 1 Loss: 18.4364
Epoch 2/5


Training Epoch 2: 100%|██████████| 27/27 [00:12<00:00,  2.19it/s, loss=0.2]  


Epoch 2 Loss: 8.7330
Epoch 3/5


Training Epoch 3: 100%|██████████| 27/27 [00:12<00:00,  2.17it/s, loss=0.172]


Epoch 3 Loss: 6.3836
Epoch 4/5


Training Epoch 4: 100%|██████████| 27/27 [00:12<00:00,  2.22it/s, loss=0.197]


Epoch 4 Loss: 5.1570
Epoch 5/5


Training Epoch 5: 100%|██████████| 27/27 [00:11<00:00,  2.27it/s, loss=0.111]


Epoch 5 Loss: 3.9296
Test Metrics: Precision=0.9235, Recall=0.9235, F1=0.9235

Fine-tuning roberta-large (large) with Train Size 215, Split 5...


Map: 100%|██████████| 215/215 [00:00<00:00, 8422.93 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 5986.10 examples/s]
Map: 100%|██████████| 990/990 [00:00<00:00, 9553.96 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 27/27 [00:13<00:00,  1.95it/s, loss=0.557]


Epoch 1 Loss: 20.7233
Epoch 2/5


Training Epoch 2: 100%|██████████| 27/27 [00:13<00:00,  1.95it/s, loss=0.432]


Epoch 2 Loss: 10.3666
Epoch 3/5


Training Epoch 3: 100%|██████████| 27/27 [00:13<00:00,  2.01it/s, loss=0.294]


Epoch 3 Loss: 7.1785
Epoch 4/5


Training Epoch 4: 100%|██████████| 27/27 [00:14<00:00,  1.87it/s, loss=0.187]


Epoch 4 Loss: 6.2361
Epoch 5/5


Training Epoch 5: 100%|██████████| 27/27 [00:13<00:00,  1.99it/s, loss=0.114] 


Epoch 5 Loss: 4.4643
Test Metrics: Precision=0.9243, Recall=0.9243, F1=0.9243

Fine-tuning roberta-large (large) with Train Size 220, Split 1...


Map: 100%|██████████| 220/220 [00:00<00:00, 8347.63 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 5154.29 examples/s]
Map: 100%|██████████| 984/984 [00:00<00:00, 9312.94 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 28/28 [00:11<00:00,  2.38it/s, loss=0.299]


Epoch 1 Loss: 19.9334
Epoch 2/5


Training Epoch 2: 100%|██████████| 28/28 [00:12<00:00,  2.20it/s, loss=0.28] 


Epoch 2 Loss: 8.4079
Epoch 3/5


Training Epoch 3: 100%|██████████| 28/28 [00:12<00:00,  2.32it/s, loss=0.242] 


Epoch 3 Loss: 5.6585
Epoch 4/5


Training Epoch 4: 100%|██████████| 28/28 [00:11<00:00,  2.36it/s, loss=0.102] 


Epoch 4 Loss: 4.1387
Epoch 5/5


Training Epoch 5: 100%|██████████| 28/28 [00:12<00:00,  2.28it/s, loss=0.0739]


Epoch 5 Loss: 3.0703
Test Metrics: Precision=0.9265, Recall=0.9265, F1=0.9265

Fine-tuning roberta-large (large) with Train Size 220, Split 2...


Map: 100%|██████████| 220/220 [00:00<00:00, 8350.57 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 6894.15 examples/s]
Map: 100%|██████████| 984/984 [00:00<00:00, 9368.96 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 28/28 [00:12<00:00,  2.25it/s, loss=0.205]


Epoch 1 Loss: 17.5868
Epoch 2/5


Training Epoch 2: 100%|██████████| 28/28 [00:12<00:00,  2.31it/s, loss=0.298]


Epoch 2 Loss: 8.7428
Epoch 3/5


Training Epoch 3: 100%|██████████| 28/28 [00:11<00:00,  2.37it/s, loss=0.225] 


Epoch 3 Loss: 5.6603
Epoch 4/5


Training Epoch 4: 100%|██████████| 28/28 [00:13<00:00,  2.11it/s, loss=0.141] 


Epoch 4 Loss: 3.9163
Epoch 5/5


Training Epoch 5: 100%|██████████| 28/28 [00:12<00:00,  2.21it/s, loss=0.0735]


Epoch 5 Loss: 2.8185
Test Metrics: Precision=0.9279, Recall=0.9279, F1=0.9279

Fine-tuning roberta-large (large) with Train Size 220, Split 3...


Map: 100%|██████████| 220/220 [00:00<00:00, 8667.55 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 5464.57 examples/s]
Map: 100%|██████████| 984/984 [00:00<00:00, 9823.19 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 28/28 [00:13<00:00,  2.13it/s, loss=0.254]


Epoch 1 Loss: 18.1874
Epoch 2/5


Training Epoch 2: 100%|██████████| 28/28 [00:12<00:00,  2.21it/s, loss=0.577]


Epoch 2 Loss: 8.2300
Epoch 3/5


Training Epoch 3: 100%|██████████| 28/28 [00:12<00:00,  2.18it/s, loss=0.272] 


Epoch 3 Loss: 5.0817
Epoch 4/5


Training Epoch 4: 100%|██████████| 28/28 [00:13<00:00,  2.13it/s, loss=0.139] 


Epoch 4 Loss: 3.4534
Epoch 5/5


Training Epoch 5: 100%|██████████| 28/28 [00:12<00:00,  2.17it/s, loss=0.0455]


Epoch 5 Loss: 2.5801
Test Metrics: Precision=0.9287, Recall=0.9287, F1=0.9287

Fine-tuning roberta-large (large) with Train Size 220, Split 4...


Map: 100%|██████████| 220/220 [00:00<00:00, 8357.08 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 6773.95 examples/s]
Map: 100%|██████████| 984/984 [00:00<00:00, 9421.31 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 28/28 [00:12<00:00,  2.25it/s, loss=0.318]


Epoch 1 Loss: 17.7594
Epoch 2/5


Training Epoch 2: 100%|██████████| 28/28 [00:12<00:00,  2.19it/s, loss=0.229]


Epoch 2 Loss: 8.9065
Epoch 3/5


Training Epoch 3: 100%|██████████| 28/28 [00:12<00:00,  2.29it/s, loss=0.223] 


Epoch 3 Loss: 6.1017
Epoch 4/5


Training Epoch 4: 100%|██████████| 28/28 [00:12<00:00,  2.20it/s, loss=0.0691]


Epoch 4 Loss: 4.4459
Epoch 5/5


Training Epoch 5: 100%|██████████| 28/28 [00:13<00:00,  2.10it/s, loss=0.071] 


Epoch 5 Loss: 3.6755
Test Metrics: Precision=0.9255, Recall=0.9255, F1=0.9255

Fine-tuning roberta-large (large) with Train Size 220, Split 5...


Map: 100%|██████████| 220/220 [00:00<00:00, 8460.37 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 6349.54 examples/s]
Map: 100%|██████████| 984/984 [00:00<00:00, 9700.73 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 28/28 [00:14<00:00,  1.96it/s, loss=0.387]


Epoch 1 Loss: 19.0134
Epoch 2/5


Training Epoch 2: 100%|██████████| 28/28 [00:13<00:00,  2.07it/s, loss=0.0494]


Epoch 2 Loss: 8.1305
Epoch 3/5


Training Epoch 3: 100%|██████████| 28/28 [00:14<00:00,  1.94it/s, loss=0.125]


Epoch 3 Loss: 5.7263
Epoch 4/5


Training Epoch 4: 100%|██████████| 28/28 [00:14<00:00,  1.98it/s, loss=0.318] 


Epoch 4 Loss: 4.1233
Epoch 5/5


Training Epoch 5: 100%|██████████| 28/28 [00:14<00:00,  1.97it/s, loss=0.107] 


Epoch 5 Loss: 3.1694
Test Metrics: Precision=0.9304, Recall=0.9304, F1=0.9304

Fine-tuning roberta-large (large) with Train Size 225, Split 1...


Map: 100%|██████████| 225/225 [00:00<00:00, 8596.45 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 6338.36 examples/s]
Map: 100%|██████████| 978/978 [00:00<00:00, 9415.58 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 29/29 [00:12<00:00,  2.26it/s, loss=1.05] 


Epoch 1 Loss: 18.3272
Epoch 2/5


Training Epoch 2: 100%|██████████| 29/29 [00:12<00:00,  2.31it/s, loss=0.116]


Epoch 2 Loss: 8.2728
Epoch 3/5


Training Epoch 3: 100%|██████████| 29/29 [00:12<00:00,  2.32it/s, loss=0.2]  


Epoch 3 Loss: 5.4795
Epoch 4/5


Training Epoch 4: 100%|██████████| 29/29 [00:12<00:00,  2.27it/s, loss=0.17]  


Epoch 4 Loss: 4.0713
Epoch 5/5


Training Epoch 5: 100%|██████████| 29/29 [00:13<00:00,  2.21it/s, loss=0.00621]


Epoch 5 Loss: 2.8062
Test Metrics: Precision=0.9288, Recall=0.9288, F1=0.9288

Fine-tuning roberta-large (large) with Train Size 225, Split 2...


Map: 100%|██████████| 225/225 [00:00<00:00, 8513.86 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 5652.19 examples/s]
Map: 100%|██████████| 978/978 [00:00<00:00, 9553.69 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 29/29 [00:13<00:00,  2.14it/s, loss=0.591]


Epoch 1 Loss: 15.8248
Epoch 2/5


Training Epoch 2: 100%|██████████| 29/29 [00:13<00:00,  2.17it/s, loss=0.635]


Epoch 2 Loss: 8.6327
Epoch 3/5


Training Epoch 3: 100%|██████████| 29/29 [00:12<00:00,  2.28it/s, loss=0.655]


Epoch 3 Loss: 6.7986
Epoch 4/5


Training Epoch 4: 100%|██████████| 29/29 [00:13<00:00,  2.19it/s, loss=0.295] 


Epoch 4 Loss: 4.7849
Epoch 5/5


Training Epoch 5: 100%|██████████| 29/29 [00:13<00:00,  2.21it/s, loss=0.274] 


Epoch 5 Loss: 3.8631
Test Metrics: Precision=0.9242, Recall=0.9242, F1=0.9242

Fine-tuning roberta-large (large) with Train Size 225, Split 3...


Map: 100%|██████████| 225/225 [00:00<00:00, 8485.38 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 6718.05 examples/s]
Map: 100%|██████████| 978/978 [00:00<00:00, 9370.43 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 29/29 [00:12<00:00,  2.25it/s, loss=0.379]


Epoch 1 Loss: 16.9095
Epoch 2/5


Training Epoch 2: 100%|██████████| 29/29 [00:13<00:00,  2.10it/s, loss=0.247]


Epoch 2 Loss: 13.0172
Epoch 3/5


Training Epoch 3: 100%|██████████| 29/29 [00:13<00:00,  2.14it/s, loss=0.32] 


Epoch 3 Loss: 11.8554
Epoch 4/5


Training Epoch 4: 100%|██████████| 29/29 [00:13<00:00,  2.18it/s, loss=0.19] 


Epoch 4 Loss: 10.2048
Epoch 5/5


Training Epoch 5: 100%|██████████| 29/29 [00:12<00:00,  2.24it/s, loss=0.399]


Epoch 5 Loss: 9.4754
Test Metrics: Precision=0.8790, Recall=0.8790, F1=0.8790

Fine-tuning roberta-large (large) with Train Size 225, Split 4...


Map: 100%|██████████| 225/225 [00:00<00:00, 8500.13 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 5641.89 examples/s]
Map: 100%|██████████| 978/978 [00:00<00:00, 9232.09 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 29/29 [00:12<00:00,  2.26it/s, loss=0.264]


Epoch 1 Loss: 18.4271
Epoch 2/5


Training Epoch 2: 100%|██████████| 29/29 [00:13<00:00,  2.23it/s, loss=0.167]


Epoch 2 Loss: 10.2790
Epoch 3/5


Training Epoch 3: 100%|██████████| 29/29 [00:13<00:00,  2.16it/s, loss=0.1]  


Epoch 3 Loss: 6.3116
Epoch 4/5


Training Epoch 4: 100%|██████████| 29/29 [00:13<00:00,  2.18it/s, loss=0.312] 


Epoch 4 Loss: 4.8269
Epoch 5/5


Training Epoch 5: 100%|██████████| 29/29 [00:13<00:00,  2.23it/s, loss=0.0122]


Epoch 5 Loss: 3.5039
Test Metrics: Precision=0.9246, Recall=0.9246, F1=0.9246

Fine-tuning roberta-large (large) with Train Size 225, Split 5...


Map: 100%|██████████| 225/225 [00:00<00:00, 8359.18 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 5474.00 examples/s]
Map: 100%|██████████| 978/978 [00:00<00:00, 9653.83 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 29/29 [00:14<00:00,  2.04it/s, loss=0.325]


Epoch 1 Loss: 18.5336
Epoch 2/5


Training Epoch 2: 100%|██████████| 29/29 [00:14<00:00,  2.02it/s, loss=0.415]


Epoch 2 Loss: 9.0705
Epoch 3/5


Training Epoch 3: 100%|██████████| 29/29 [00:14<00:00,  2.05it/s, loss=0.119]


Epoch 3 Loss: 6.2552
Epoch 4/5


Training Epoch 4: 100%|██████████| 29/29 [00:14<00:00,  2.01it/s, loss=0.0364]


Epoch 4 Loss: 4.4464
Epoch 5/5


Training Epoch 5: 100%|██████████| 29/29 [00:13<00:00,  2.08it/s, loss=0.0833]


Epoch 5 Loss: 3.3738
Test Metrics: Precision=0.9293, Recall=0.9293, F1=0.9293

Fine-tuning roberta-large (large) with Train Size 230, Split 1...


Map: 100%|██████████| 230/230 [00:00<00:00, 8435.92 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 5809.64 examples/s]
Map: 100%|██████████| 972/972 [00:00<00:00, 9450.35 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 29/29 [00:12<00:00,  2.25it/s, loss=0.506]


Epoch 1 Loss: 18.5089
Epoch 2/5


Training Epoch 2: 100%|██████████| 29/29 [00:12<00:00,  2.26it/s, loss=0.179]


Epoch 2 Loss: 7.8045
Epoch 3/5


Training Epoch 3: 100%|██████████| 29/29 [00:13<00:00,  2.22it/s, loss=0.149] 


Epoch 3 Loss: 5.1463
Epoch 4/5


Training Epoch 4: 100%|██████████| 29/29 [00:13<00:00,  2.22it/s, loss=0.171] 


Epoch 4 Loss: 3.5019
Epoch 5/5


Training Epoch 5: 100%|██████████| 29/29 [00:12<00:00,  2.26it/s, loss=0.103] 


Epoch 5 Loss: 2.4330
Test Metrics: Precision=0.9280, Recall=0.9280, F1=0.9280

Fine-tuning roberta-large (large) with Train Size 230, Split 2...


Map: 100%|██████████| 230/230 [00:00<00:00, 8425.46 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 4488.60 examples/s]
Map: 100%|██████████| 972/972 [00:00<00:00, 9630.55 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 29/29 [00:13<00:00,  2.22it/s, loss=0.415]


Epoch 1 Loss: 19.2018
Epoch 2/5


Training Epoch 2: 100%|██████████| 29/29 [00:13<00:00,  2.15it/s, loss=0.368]


Epoch 2 Loss: 8.7137
Epoch 3/5


Training Epoch 3: 100%|██████████| 29/29 [00:13<00:00,  2.19it/s, loss=0.242]


Epoch 3 Loss: 5.8320
Epoch 4/5


Training Epoch 4: 100%|██████████| 29/29 [00:13<00:00,  2.08it/s, loss=0.173] 


Epoch 4 Loss: 3.8096
Epoch 5/5


Training Epoch 5: 100%|██████████| 29/29 [00:13<00:00,  2.12it/s, loss=0.0478]


Epoch 5 Loss: 2.8524
Test Metrics: Precision=0.9301, Recall=0.9301, F1=0.9301

Fine-tuning roberta-large (large) with Train Size 230, Split 3...


Map: 100%|██████████| 230/230 [00:00<00:00, 8485.49 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 5949.18 examples/s]
Map: 100%|██████████| 972/972 [00:00<00:00, 9390.28 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 29/29 [00:13<00:00,  2.10it/s, loss=0.395]


Epoch 1 Loss: 18.8949
Epoch 2/5


Training Epoch 2: 100%|██████████| 29/29 [00:13<00:00,  2.08it/s, loss=0.481]


Epoch 2 Loss: 8.6839
Epoch 3/5


Training Epoch 3: 100%|██████████| 29/29 [00:13<00:00,  2.18it/s, loss=0.222]


Epoch 3 Loss: 5.9910
Epoch 4/5


Training Epoch 4: 100%|██████████| 29/29 [00:13<00:00,  2.13it/s, loss=0.209]


Epoch 4 Loss: 5.1305
Epoch 5/5


Training Epoch 5: 100%|██████████| 29/29 [00:13<00:00,  2.08it/s, loss=0.153] 


Epoch 5 Loss: 3.9694
Test Metrics: Precision=0.9244, Recall=0.9244, F1=0.9244

Fine-tuning roberta-large (large) with Train Size 230, Split 4...


Map: 100%|██████████| 230/230 [00:00<00:00, 8451.15 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 5788.54 examples/s]
Map: 100%|██████████| 972/972 [00:00<00:00, 4384.94 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 29/29 [00:13<00:00,  2.18it/s, loss=0.315]


Epoch 1 Loss: 17.1479
Epoch 2/5


Training Epoch 2: 100%|██████████| 29/29 [00:13<00:00,  2.10it/s, loss=0.307]


Epoch 2 Loss: 8.2908
Epoch 3/5


Training Epoch 3: 100%|██████████| 29/29 [00:14<00:00,  2.06it/s, loss=0.304] 


Epoch 3 Loss: 5.6306
Epoch 4/5


Training Epoch 4: 100%|██████████| 29/29 [00:13<00:00,  2.13it/s, loss=0.125] 


Epoch 4 Loss: 3.8956
Epoch 5/5


Training Epoch 5: 100%|██████████| 29/29 [00:13<00:00,  2.11it/s, loss=0.0599]


Epoch 5 Loss: 2.9105
Test Metrics: Precision=0.9263, Recall=0.9263, F1=0.9263

Fine-tuning roberta-large (large) with Train Size 230, Split 5...


Map: 100%|██████████| 230/230 [00:00<00:00, 8295.55 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 6904.95 examples/s]
Map: 100%|██████████| 972/972 [00:00<00:00, 9123.40 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 29/29 [00:14<00:00,  2.03it/s, loss=0.382]


Epoch 1 Loss: 20.4369
Epoch 2/5


Training Epoch 2: 100%|██████████| 29/29 [00:14<00:00,  1.99it/s, loss=0.23] 


Epoch 2 Loss: 10.1030
Epoch 3/5


Training Epoch 3: 100%|██████████| 29/29 [00:14<00:00,  2.06it/s, loss=0.142]


Epoch 3 Loss: 7.1008
Epoch 4/5


Training Epoch 4: 100%|██████████| 29/29 [00:14<00:00,  2.07it/s, loss=0.112] 


Epoch 4 Loss: 5.2712
Epoch 5/5


Training Epoch 5: 100%|██████████| 29/29 [00:14<00:00,  2.03it/s, loss=0.17]  


Epoch 5 Loss: 4.2715
Test Metrics: Precision=0.9273, Recall=0.9273, F1=0.9273

Fine-tuning roberta-large (large) with Train Size 235, Split 1...


Map: 100%|██████████| 235/235 [00:00<00:00, 8451.26 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 5939.33 examples/s]
Map: 100%|██████████| 966/966 [00:00<00:00, 9232.64 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 30/30 [00:13<00:00,  2.27it/s, loss=0.193]


Epoch 1 Loss: 17.2821
Epoch 2/5


Training Epoch 2: 100%|██████████| 30/30 [00:12<00:00,  2.32it/s, loss=0.217]


Epoch 2 Loss: 8.3273
Epoch 3/5


Training Epoch 3: 100%|██████████| 30/30 [00:13<00:00,  2.26it/s, loss=0.0743]


Epoch 3 Loss: 5.7092
Epoch 4/5


Training Epoch 4: 100%|██████████| 30/30 [00:13<00:00,  2.21it/s, loss=0.171] 


Epoch 4 Loss: 4.0650
Epoch 5/5


Training Epoch 5: 100%|██████████| 30/30 [00:12<00:00,  2.40it/s, loss=0.0935]


Epoch 5 Loss: 3.0635
Test Metrics: Precision=0.9290, Recall=0.9290, F1=0.9290

Fine-tuning roberta-large (large) with Train Size 235, Split 2...


Map: 100%|██████████| 235/235 [00:00<00:00, 8742.55 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 6189.20 examples/s]
Map: 100%|██████████| 966/966 [00:00<00:00, 9617.63 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 30/30 [00:13<00:00,  2.19it/s, loss=0.446]


Epoch 1 Loss: 23.4111
Epoch 2/5


Training Epoch 2: 100%|██████████| 30/30 [00:13<00:00,  2.25it/s, loss=0.276]


Epoch 2 Loss: 12.0742
Epoch 3/5


Training Epoch 3: 100%|██████████| 30/30 [00:13<00:00,  2.21it/s, loss=0.249]


Epoch 3 Loss: 8.0711
Epoch 4/5


Training Epoch 4: 100%|██████████| 30/30 [00:13<00:00,  2.21it/s, loss=0.19] 


Epoch 4 Loss: 6.1818
Epoch 5/5


Training Epoch 5: 100%|██████████| 30/30 [00:13<00:00,  2.18it/s, loss=0.0633]


Epoch 5 Loss: 4.6521
Test Metrics: Precision=0.9220, Recall=0.9220, F1=0.9220

Fine-tuning roberta-large (large) with Train Size 235, Split 3...


Map: 100%|██████████| 235/235 [00:00<00:00, 8309.61 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 6209.67 examples/s]
Map: 100%|██████████| 966/966 [00:00<00:00, 9224.86 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 30/30 [00:14<00:00,  2.11it/s, loss=0.378]


Epoch 1 Loss: 18.2722
Epoch 2/5


Training Epoch 2: 100%|██████████| 30/30 [00:13<00:00,  2.16it/s, loss=0.179]


Epoch 2 Loss: 7.8781
Epoch 3/5


Training Epoch 3: 100%|██████████| 30/30 [00:13<00:00,  2.19it/s, loss=0.372] 


Epoch 3 Loss: 5.1773
Epoch 4/5


Training Epoch 4: 100%|██████████| 30/30 [00:14<00:00,  2.13it/s, loss=0.053] 


Epoch 4 Loss: 3.5136
Epoch 5/5


Training Epoch 5: 100%|██████████| 30/30 [00:13<00:00,  2.23it/s, loss=0.0592]


Epoch 5 Loss: 2.6167
Test Metrics: Precision=0.9263, Recall=0.9263, F1=0.9263

Fine-tuning roberta-large (large) with Train Size 235, Split 4...


Map: 100%|██████████| 235/235 [00:00<00:00, 8769.46 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 6564.51 examples/s]
Map: 100%|██████████| 966/966 [00:00<00:00, 9366.33 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 30/30 [00:14<00:00,  2.10it/s, loss=0.33] 


Epoch 1 Loss: 18.1275
Epoch 2/5


Training Epoch 2: 100%|██████████| 30/30 [00:13<00:00,  2.21it/s, loss=0.0975]


Epoch 2 Loss: 9.0554
Epoch 3/5


Training Epoch 3: 100%|██████████| 30/30 [00:14<00:00,  2.13it/s, loss=0.093] 


Epoch 3 Loss: 6.1228
Epoch 4/5


Training Epoch 4: 100%|██████████| 30/30 [00:13<00:00,  2.15it/s, loss=0.106] 


Epoch 4 Loss: 4.7121
Epoch 5/5


Training Epoch 5: 100%|██████████| 30/30 [00:13<00:00,  2.22it/s, loss=0.0877]


Epoch 5 Loss: 3.7277
Test Metrics: Precision=0.9256, Recall=0.9256, F1=0.9256

Fine-tuning roberta-large (large) with Train Size 235, Split 5...


Map: 100%|██████████| 235/235 [00:00<00:00, 8235.67 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 6286.91 examples/s]
Map: 100%|██████████| 966/966 [00:00<00:00, 9172.90 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 30/30 [00:14<00:00,  2.02it/s, loss=0.298]


Epoch 1 Loss: 20.8266
Epoch 2/5


Training Epoch 2: 100%|██████████| 30/30 [00:14<00:00,  2.00it/s, loss=0.619]


Epoch 2 Loss: 9.8112
Epoch 3/5


Training Epoch 3: 100%|██████████| 30/30 [00:14<00:00,  2.08it/s, loss=0.237]


Epoch 3 Loss: 6.8182
Epoch 4/5


Training Epoch 4: 100%|██████████| 30/30 [00:14<00:00,  2.04it/s, loss=0.169] 


Epoch 4 Loss: 4.7342
Epoch 5/5


Training Epoch 5: 100%|██████████| 30/30 [00:15<00:00,  2.00it/s, loss=0.118] 


Epoch 5 Loss: 3.5190
Test Metrics: Precision=0.9303, Recall=0.9303, F1=0.9303

Fine-tuning roberta-large (large) with Train Size 240, Split 1...


Map: 100%|██████████| 240/240 [00:00<00:00, 8273.81 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 6914.64 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9192.83 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 30/30 [00:13<00:00,  2.20it/s, loss=0.348]


Epoch 1 Loss: 19.8129
Epoch 2/5


Training Epoch 2: 100%|██████████| 30/30 [00:13<00:00,  2.17it/s, loss=0.334]


Epoch 2 Loss: 8.5848
Epoch 3/5


Training Epoch 3: 100%|██████████| 30/30 [00:13<00:00,  2.23it/s, loss=0.193] 


Epoch 3 Loss: 5.5881
Epoch 4/5


Training Epoch 4: 100%|██████████| 30/30 [00:12<00:00,  2.35it/s, loss=0.0871]


Epoch 4 Loss: 4.4469
Epoch 5/5


Training Epoch 5: 100%|██████████| 30/30 [00:13<00:00,  2.30it/s, loss=0.119] 


Epoch 5 Loss: 3.1632
Test Metrics: Precision=0.9290, Recall=0.9290, F1=0.9290

Fine-tuning roberta-large (large) with Train Size 240, Split 2...


Map: 100%|██████████| 240/240 [00:00<00:00, 8573.44 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 6049.30 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9450.53 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 30/30 [00:13<00:00,  2.20it/s, loss=0.264]


Epoch 1 Loss: 19.5502
Epoch 2/5


Training Epoch 2: 100%|██████████| 30/30 [00:14<00:00,  2.10it/s, loss=0.233]


Epoch 2 Loss: 10.0850
Epoch 3/5


Training Epoch 3: 100%|██████████| 30/30 [00:13<00:00,  2.20it/s, loss=0.152]


Epoch 3 Loss: 7.1240
Epoch 4/5


Training Epoch 4: 100%|██████████| 30/30 [00:13<00:00,  2.15it/s, loss=0.144]


Epoch 4 Loss: 5.2095
Epoch 5/5


Training Epoch 5: 100%|██████████| 30/30 [00:14<00:00,  2.12it/s, loss=0.135] 


Epoch 5 Loss: 4.0731
Test Metrics: Precision=0.9272, Recall=0.9272, F1=0.9272

Fine-tuning roberta-large (large) with Train Size 240, Split 3...


Map: 100%|██████████| 240/240 [00:00<00:00, 8370.47 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 5011.24 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9248.93 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 30/30 [00:14<00:00,  2.05it/s, loss=0.574]


Epoch 1 Loss: 21.3044
Epoch 2/5


Training Epoch 2: 100%|██████████| 30/30 [00:14<00:00,  2.06it/s, loss=0.221]


Epoch 2 Loss: 9.3418
Epoch 3/5


Training Epoch 3: 100%|██████████| 30/30 [00:14<00:00,  2.12it/s, loss=0.239]


Epoch 3 Loss: 6.2584
Epoch 4/5


Training Epoch 4: 100%|██████████| 30/30 [00:14<00:00,  2.09it/s, loss=0.16]  


Epoch 4 Loss: 4.6517
Epoch 5/5


Training Epoch 5: 100%|██████████| 30/30 [00:14<00:00,  2.07it/s, loss=0.117] 


Epoch 5 Loss: 3.5323
Test Metrics: Precision=0.9268, Recall=0.9268, F1=0.9268

Fine-tuning roberta-large (large) with Train Size 240, Split 4...


Map: 100%|██████████| 240/240 [00:00<00:00, 8370.61 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 5877.81 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9362.94 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 30/30 [00:13<00:00,  2.15it/s, loss=0.344]


Epoch 1 Loss: 18.4312
Epoch 2/5


Training Epoch 2: 100%|██████████| 30/30 [00:14<00:00,  2.12it/s, loss=0.193]


Epoch 2 Loss: 8.2962
Epoch 3/5


Training Epoch 3: 100%|██████████| 30/30 [00:14<00:00,  2.11it/s, loss=0.132] 


Epoch 3 Loss: 5.4571
Epoch 4/5


Training Epoch 4: 100%|██████████| 30/30 [00:14<00:00,  2.11it/s, loss=0.168] 


Epoch 4 Loss: 3.9115
Epoch 5/5


Training Epoch 5: 100%|██████████| 30/30 [00:14<00:00,  2.06it/s, loss=0.191] 


Epoch 5 Loss: 3.2582
Test Metrics: Precision=0.9300, Recall=0.9300, F1=0.9300

Fine-tuning roberta-large (large) with Train Size 240, Split 5...


Map: 100%|██████████| 240/240 [00:00<00:00, 8407.31 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 6127.92 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9160.56 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 30/30 [00:15<00:00,  1.91it/s, loss=0.379]


Epoch 1 Loss: 19.6991
Epoch 2/5


Training Epoch 2: 100%|██████████| 30/30 [00:15<00:00,  1.90it/s, loss=0.544]


Epoch 2 Loss: 11.2846
Epoch 3/5


Training Epoch 3: 100%|██████████| 30/30 [00:15<00:00,  1.99it/s, loss=0.506] 


Epoch 3 Loss: 7.9032
Epoch 4/5


Training Epoch 4: 100%|██████████| 30/30 [00:15<00:00,  1.96it/s, loss=0.256]


Epoch 4 Loss: 6.1400
Epoch 5/5


Training Epoch 5: 100%|██████████| 30/30 [00:15<00:00,  1.91it/s, loss=0.167]


Epoch 5 Loss: 5.0832
Test Metrics: Precision=0.9265, Recall=0.9265, F1=0.9265

Fine-tuning roberta-large (large) with Train Size 245, Split 1...


Map: 100%|██████████| 245/245 [00:00<00:00, 8254.78 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 6066.32 examples/s]
Map: 100%|██████████| 954/954 [00:00<00:00, 9456.95 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 31/31 [00:13<00:00,  2.28it/s, loss=0.424]


Epoch 1 Loss: 18.9395
Epoch 2/5


Training Epoch 2: 100%|██████████| 31/31 [00:13<00:00,  2.29it/s, loss=0.445]


Epoch 2 Loss: 9.6352
Epoch 3/5


Training Epoch 3: 100%|██████████| 31/31 [00:13<00:00,  2.23it/s, loss=0.169]


Epoch 3 Loss: 6.0995
Epoch 4/5


Training Epoch 4: 100%|██████████| 31/31 [00:13<00:00,  2.23it/s, loss=0.17]  


Epoch 4 Loss: 4.2145
Epoch 5/5


Training Epoch 5: 100%|██████████| 31/31 [00:13<00:00,  2.25it/s, loss=0.114] 


Epoch 5 Loss: 3.2621
Test Metrics: Precision=0.9270, Recall=0.9270, F1=0.9270

Fine-tuning roberta-large (large) with Train Size 245, Split 2...


Map: 100%|██████████| 245/245 [00:00<00:00, 8171.09 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 5527.43 examples/s]
Map: 100%|██████████| 954/954 [00:00<00:00, 9628.32 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 31/31 [00:13<00:00,  2.23it/s, loss=0.354]


Epoch 1 Loss: 19.9169
Epoch 2/5


Training Epoch 2: 100%|██████████| 31/31 [00:13<00:00,  2.22it/s, loss=0.529]


Epoch 2 Loss: 9.3395
Epoch 3/5


Training Epoch 3: 100%|██████████| 31/31 [00:14<00:00,  2.18it/s, loss=0.357]


Epoch 3 Loss: 6.5037
Epoch 4/5


Training Epoch 4: 100%|██████████| 31/31 [00:14<00:00,  2.10it/s, loss=0.187] 


Epoch 4 Loss: 4.4526
Epoch 5/5


Training Epoch 5: 100%|██████████| 31/31 [00:13<00:00,  2.24it/s, loss=0.0834]


Epoch 5 Loss: 3.2559
Test Metrics: Precision=0.9319, Recall=0.9319, F1=0.9319

Fine-tuning roberta-large (large) with Train Size 245, Split 3...


Map: 100%|██████████| 245/245 [00:00<00:00, 8095.64 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 6101.44 examples/s]
Map: 100%|██████████| 954/954 [00:00<00:00, 4296.56 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 31/31 [00:14<00:00,  2.15it/s, loss=0.444]


Epoch 1 Loss: 22.4410
Epoch 2/5


Training Epoch 2: 100%|██████████| 31/31 [00:14<00:00,  2.09it/s, loss=0.24] 


Epoch 2 Loss: 8.8410
Epoch 3/5


Training Epoch 3: 100%|██████████| 31/31 [00:15<00:00,  2.04it/s, loss=0.166] 


Epoch 3 Loss: 5.7718
Epoch 4/5


Training Epoch 4: 100%|██████████| 31/31 [00:14<00:00,  2.15it/s, loss=0.114] 


Epoch 4 Loss: 3.6023
Epoch 5/5


Training Epoch 5: 100%|██████████| 31/31 [00:14<00:00,  2.15it/s, loss=0.0754]


Epoch 5 Loss: 2.6382
Test Metrics: Precision=0.9300, Recall=0.9300, F1=0.9300

Fine-tuning roberta-large (large) with Train Size 245, Split 4...


Map: 100%|██████████| 245/245 [00:00<00:00, 8587.42 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 6096.91 examples/s]
Map: 100%|██████████| 954/954 [00:00<00:00, 9069.36 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 31/31 [00:14<00:00,  2.13it/s, loss=0.262]


Epoch 1 Loss: 21.4200
Epoch 2/5


Training Epoch 2: 100%|██████████| 31/31 [00:14<00:00,  2.13it/s, loss=0.37] 


Epoch 2 Loss: 9.6125
Epoch 3/5


Training Epoch 3: 100%|██████████| 31/31 [00:14<00:00,  2.18it/s, loss=0.166]


Epoch 3 Loss: 6.5611
Epoch 4/5


Training Epoch 4: 100%|██████████| 31/31 [00:14<00:00,  2.15it/s, loss=0.137] 


Epoch 4 Loss: 4.7495
Epoch 5/5


Training Epoch 5: 100%|██████████| 31/31 [00:14<00:00,  2.10it/s, loss=0.0825]


Epoch 5 Loss: 3.7129
Test Metrics: Precision=0.9279, Recall=0.9279, F1=0.9279

Fine-tuning roberta-large (large) with Train Size 245, Split 5...


Map: 100%|██████████| 245/245 [00:00<00:00, 8474.04 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 6578.56 examples/s]
Map: 100%|██████████| 954/954 [00:00<00:00, 9263.19 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 31/31 [00:15<00:00,  1.94it/s, loss=0.444]


Epoch 1 Loss: 21.1950
Epoch 2/5


Training Epoch 2: 100%|██████████| 31/31 [00:15<00:00,  2.01it/s, loss=0.267]


Epoch 2 Loss: 10.2044
Epoch 3/5


Training Epoch 3: 100%|██████████| 31/31 [00:15<00:00,  1.97it/s, loss=0.346]


Epoch 3 Loss: 7.0333
Epoch 4/5


Training Epoch 4: 100%|██████████| 31/31 [00:16<00:00,  1.93it/s, loss=0.0934]


Epoch 4 Loss: 4.7483
Epoch 5/5


Training Epoch 5: 100%|██████████| 31/31 [00:15<00:00,  1.99it/s, loss=0.102] 


Epoch 5 Loss: 3.7987
Test Metrics: Precision=0.9324, Recall=0.9324, F1=0.9324

Fine-tuning roberta-large (large) with Train Size 250, Split 1...


Map: 100%|██████████| 250/250 [00:00<00:00, 8610.91 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 6710.24 examples/s]
Map: 100%|██████████| 948/948 [00:00<00:00, 4435.78 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 32/32 [00:14<00:00,  2.23it/s, loss=0.565]


Epoch 1 Loss: 20.0371
Epoch 2/5


Training Epoch 2: 100%|██████████| 32/32 [00:14<00:00,  2.28it/s, loss=0.359]


Epoch 2 Loss: 10.1964
Epoch 3/5


Training Epoch 3: 100%|██████████| 32/32 [00:14<00:00,  2.25it/s, loss=0.121]


Epoch 3 Loss: 6.0757
Epoch 4/5


Training Epoch 4: 100%|██████████| 32/32 [00:13<00:00,  2.37it/s, loss=0.187] 


Epoch 4 Loss: 4.4739
Epoch 5/5


Training Epoch 5: 100%|██████████| 32/32 [00:13<00:00,  2.33it/s, loss=0.0836]


Epoch 5 Loss: 3.5608
Test Metrics: Precision=0.9280, Recall=0.9280, F1=0.9280

Fine-tuning roberta-large (large) with Train Size 250, Split 2...


Map: 100%|██████████| 250/250 [00:00<00:00, 8391.76 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 6376.45 examples/s]
Map: 100%|██████████| 948/948 [00:00<00:00, 9440.26 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 32/32 [00:14<00:00,  2.19it/s, loss=0.367]


Epoch 1 Loss: 20.6956
Epoch 2/5


Training Epoch 2: 100%|██████████| 32/32 [00:14<00:00,  2.21it/s, loss=0.128]


Epoch 2 Loss: 9.3876
Epoch 3/5


Training Epoch 3: 100%|██████████| 32/32 [00:14<00:00,  2.16it/s, loss=0.0266]


Epoch 3 Loss: 5.8256
Epoch 4/5


Training Epoch 4: 100%|██████████| 32/32 [00:14<00:00,  2.14it/s, loss=0.141] 


Epoch 4 Loss: 3.8454
Epoch 5/5


Training Epoch 5: 100%|██████████| 32/32 [00:14<00:00,  2.24it/s, loss=0.117] 


Epoch 5 Loss: 2.7522
Test Metrics: Precision=0.9295, Recall=0.9295, F1=0.9295

Fine-tuning roberta-large (large) with Train Size 250, Split 3...


Map: 100%|██████████| 250/250 [00:00<00:00, 8621.53 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 6972.61 examples/s]
Map: 100%|██████████| 948/948 [00:00<00:00, 9606.07 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 32/32 [00:15<00:00,  2.09it/s, loss=0.351]


Epoch 1 Loss: 22.1069
Epoch 2/5


Training Epoch 2: 100%|██████████| 32/32 [00:14<00:00,  2.17it/s, loss=0.455]


Epoch 2 Loss: 10.4940
Epoch 3/5


Training Epoch 3: 100%|██████████| 32/32 [00:14<00:00,  2.14it/s, loss=0.216] 


Epoch 3 Loss: 7.2019
Epoch 4/5


Training Epoch 4: 100%|██████████| 32/32 [00:14<00:00,  2.16it/s, loss=0.248] 


Epoch 4 Loss: 5.0429
Epoch 5/5


Training Epoch 5: 100%|██████████| 32/32 [00:14<00:00,  2.18it/s, loss=0.319] 


Epoch 5 Loss: 4.0903
Test Metrics: Precision=0.9281, Recall=0.9281, F1=0.9281

Fine-tuning roberta-large (large) with Train Size 250, Split 4...


Map: 100%|██████████| 250/250 [00:00<00:00, 8386.80 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 6672.03 examples/s]
Map: 100%|██████████| 948/948 [00:00<00:00, 9102.94 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 32/32 [00:14<00:00,  2.20it/s, loss=0.375]


Epoch 1 Loss: 19.0531
Epoch 2/5


Training Epoch 2: 100%|██████████| 32/32 [00:14<00:00,  2.16it/s, loss=0.224]


Epoch 2 Loss: 8.6723
Epoch 3/5


Training Epoch 3: 100%|██████████| 32/32 [00:15<00:00,  2.11it/s, loss=0.2]   


Epoch 3 Loss: 6.2275
Epoch 4/5


Training Epoch 4: 100%|██████████| 32/32 [00:15<00:00,  2.10it/s, loss=0.057] 


Epoch 4 Loss: 4.5620
Epoch 5/5


Training Epoch 5: 100%|██████████| 32/32 [00:14<00:00,  2.16it/s, loss=0.136] 


Epoch 5 Loss: 3.8461
Test Metrics: Precision=0.9262, Recall=0.9262, F1=0.9262

Fine-tuning roberta-large (large) with Train Size 250, Split 5...


Map: 100%|██████████| 250/250 [00:00<00:00, 8436.93 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 6445.64 examples/s]
Map: 100%|██████████| 948/948 [00:00<00:00, 9228.39 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 32/32 [00:16<00:00,  1.90it/s, loss=0.546]


Epoch 1 Loss: 24.5300
Epoch 2/5


Training Epoch 2: 100%|██████████| 32/32 [00:15<00:00,  2.11it/s, loss=0.485]


Epoch 2 Loss: 12.7483
Epoch 3/5


Training Epoch 3: 100%|██████████| 32/32 [00:16<00:00,  1.95it/s, loss=0.45] 


Epoch 3 Loss: 8.6222
Epoch 4/5


Training Epoch 4: 100%|██████████| 32/32 [00:16<00:00,  1.99it/s, loss=0.0361]


Epoch 4 Loss: 5.7320
Epoch 5/5


Training Epoch 5: 100%|██████████| 32/32 [00:15<00:00,  2.04it/s, loss=0.253] 


Epoch 5 Loss: 4.6700
Test Metrics: Precision=0.9265, Recall=0.9265, F1=0.9265

Fine-tuning roberta-large (large) with Train Size 255, Split 1...


Map: 100%|██████████| 255/255 [00:00<00:00, 8542.37 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 6483.28 examples/s]
Map: 100%|██████████| 942/942 [00:00<00:00, 9452.62 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 32/32 [00:13<00:00,  2.32it/s, loss=0.386]


Epoch 1 Loss: 20.7201
Epoch 2/5


Training Epoch 2: 100%|██████████| 32/32 [00:14<00:00,  2.23it/s, loss=0.197]


Epoch 2 Loss: 9.4702
Epoch 3/5


Training Epoch 3: 100%|██████████| 32/32 [00:14<00:00,  2.22it/s, loss=0.107] 


Epoch 3 Loss: 6.0898
Epoch 4/5


Training Epoch 4: 100%|██████████| 32/32 [00:14<00:00,  2.17it/s, loss=0.204] 


Epoch 4 Loss: 4.2921
Epoch 5/5


Training Epoch 5: 100%|██████████| 32/32 [00:14<00:00,  2.22it/s, loss=0.151] 


Epoch 5 Loss: 3.1986
Test Metrics: Precision=0.9289, Recall=0.9289, F1=0.9289

Fine-tuning roberta-large (large) with Train Size 255, Split 2...


Map: 100%|██████████| 255/255 [00:00<00:00, 8580.89 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 7154.64 examples/s]
Map: 100%|██████████| 942/942 [00:00<00:00, 9326.00 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 32/32 [00:14<00:00,  2.14it/s, loss=0.37] 


Epoch 1 Loss: 21.0336
Epoch 2/5


Training Epoch 2: 100%|██████████| 32/32 [00:14<00:00,  2.14it/s, loss=0.311]


Epoch 2 Loss: 10.0260
Epoch 3/5


Training Epoch 3: 100%|██████████| 32/32 [00:14<00:00,  2.17it/s, loss=0.186]


Epoch 3 Loss: 6.8887
Epoch 4/5


Training Epoch 4: 100%|██████████| 32/32 [00:14<00:00,  2.16it/s, loss=0.0823]


Epoch 4 Loss: 4.7385
Epoch 5/5


Training Epoch 5: 100%|██████████| 32/32 [00:14<00:00,  2.14it/s, loss=0.0756]


Epoch 5 Loss: 3.4457
Test Metrics: Precision=0.9297, Recall=0.9297, F1=0.9297

Fine-tuning roberta-large (large) with Train Size 255, Split 3...


Map: 100%|██████████| 255/255 [00:00<00:00, 8703.22 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 6156.20 examples/s]
Map: 100%|██████████| 942/942 [00:00<00:00, 9565.07 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 32/32 [00:14<00:00,  2.15it/s, loss=0.427]


Epoch 1 Loss: 18.8356
Epoch 2/5


Training Epoch 2: 100%|██████████| 32/32 [00:15<00:00,  2.06it/s, loss=0.323]


Epoch 2 Loss: 8.3565
Epoch 3/5


Training Epoch 3: 100%|██████████| 32/32 [00:14<00:00,  2.17it/s, loss=0.201]


Epoch 3 Loss: 5.7327
Epoch 4/5


Training Epoch 4: 100%|██████████| 32/32 [00:15<00:00,  2.09it/s, loss=0.113] 


Epoch 4 Loss: 3.5991
Epoch 5/5


Training Epoch 5: 100%|██████████| 32/32 [00:15<00:00,  2.09it/s, loss=0.111] 


Epoch 5 Loss: 2.6722
Test Metrics: Precision=0.9315, Recall=0.9315, F1=0.9315

Fine-tuning roberta-large (large) with Train Size 255, Split 4...


Map: 100%|██████████| 255/255 [00:00<00:00, 8729.93 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 6752.83 examples/s]
Map: 100%|██████████| 942/942 [00:00<00:00, 9328.97 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 32/32 [00:15<00:00,  2.08it/s, loss=0.448]


Epoch 1 Loss: 19.6206
Epoch 2/5


Training Epoch 2: 100%|██████████| 32/32 [00:15<00:00,  2.08it/s, loss=0.329]


Epoch 2 Loss: 9.8572
Epoch 3/5


Training Epoch 3: 100%|██████████| 32/32 [00:15<00:00,  2.05it/s, loss=0.253]


Epoch 3 Loss: 7.2011
Epoch 4/5


Training Epoch 4: 100%|██████████| 32/32 [00:15<00:00,  2.09it/s, loss=0.253] 


Epoch 4 Loss: 5.3033
Epoch 5/5


Training Epoch 5: 100%|██████████| 32/32 [00:15<00:00,  2.04it/s, loss=0.11]  


Epoch 5 Loss: 4.4410
Test Metrics: Precision=0.9258, Recall=0.9258, F1=0.9258

Fine-tuning roberta-large (large) with Train Size 255, Split 5...


Map: 100%|██████████| 255/255 [00:00<00:00, 8383.22 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 7316.90 examples/s]
Map: 100%|██████████| 942/942 [00:00<00:00, 9151.48 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 32/32 [00:16<00:00,  1.99it/s, loss=0.416]


Epoch 1 Loss: 20.0292
Epoch 2/5


Training Epoch 2: 100%|██████████| 32/32 [00:16<00:00,  1.94it/s, loss=0.352]


Epoch 2 Loss: 9.7569
Epoch 3/5


Training Epoch 3: 100%|██████████| 32/32 [00:16<00:00,  1.93it/s, loss=0.111]


Epoch 3 Loss: 6.7499
Epoch 4/5


Training Epoch 4: 100%|██████████| 32/32 [00:16<00:00,  1.97it/s, loss=0.0778]


Epoch 4 Loss: 4.5308
Epoch 5/5


Training Epoch 5: 100%|██████████| 32/32 [00:15<00:00,  2.06it/s, loss=0.0796]


Epoch 5 Loss: 3.3111
Test Metrics: Precision=0.9331, Recall=0.9331, F1=0.9331

Fine-tuning roberta-large (large) with Train Size 260, Split 1...


Map: 100%|██████████| 260/260 [00:00<00:00, 8585.89 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 6506.48 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 9537.09 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 33/33 [00:14<00:00,  2.30it/s, loss=0.433]


Epoch 1 Loss: 23.0403
Epoch 2/5


Training Epoch 2: 100%|██████████| 33/33 [00:14<00:00,  2.25it/s, loss=0.658]


Epoch 2 Loss: 11.3173
Epoch 3/5


Training Epoch 3: 100%|██████████| 33/33 [00:13<00:00,  2.36it/s, loss=0.214] 


Epoch 3 Loss: 7.4433
Epoch 4/5


Training Epoch 4: 100%|██████████| 33/33 [00:14<00:00,  2.27it/s, loss=0.156] 


Epoch 4 Loss: 5.3867
Epoch 5/5


Training Epoch 5: 100%|██████████| 33/33 [00:14<00:00,  2.26it/s, loss=0.11]  


Epoch 5 Loss: 4.1802
Test Metrics: Precision=0.9282, Recall=0.9282, F1=0.9282

Fine-tuning roberta-large (large) with Train Size 260, Split 2...


Map: 100%|██████████| 260/260 [00:00<00:00, 8731.98 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 6214.14 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 9001.25 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 33/33 [00:15<00:00,  2.17it/s, loss=0.339]


Epoch 1 Loss: 21.0068
Epoch 2/5


Training Epoch 2: 100%|██████████| 33/33 [00:14<00:00,  2.26it/s, loss=0.184]


Epoch 2 Loss: 9.5348
Epoch 3/5


Training Epoch 3: 100%|██████████| 33/33 [00:15<00:00,  2.16it/s, loss=0.227]


Epoch 3 Loss: 7.2419
Epoch 4/5


Training Epoch 4: 100%|██████████| 33/33 [00:14<00:00,  2.24it/s, loss=0.227] 


Epoch 4 Loss: 4.9979
Epoch 5/5


Training Epoch 5: 100%|██████████| 33/33 [00:15<00:00,  2.17it/s, loss=0.147] 


Epoch 5 Loss: 3.7757
Test Metrics: Precision=0.9276, Recall=0.9276, F1=0.9276

Fine-tuning roberta-large (large) with Train Size 260, Split 3...


Map: 100%|██████████| 260/260 [00:00<00:00, 8627.39 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 7112.00 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 9398.48 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 33/33 [00:15<00:00,  2.11it/s, loss=0.623]


Epoch 1 Loss: 20.5317
Epoch 2/5


Training Epoch 2: 100%|██████████| 33/33 [00:15<00:00,  2.16it/s, loss=0.198]


Epoch 2 Loss: 9.5456
Epoch 3/5


Training Epoch 3: 100%|██████████| 33/33 [00:15<00:00,  2.17it/s, loss=0.289] 


Epoch 3 Loss: 6.5541
Epoch 4/5


Training Epoch 4: 100%|██████████| 33/33 [00:15<00:00,  2.12it/s, loss=0.13]  


Epoch 4 Loss: 4.5270
Epoch 5/5


Training Epoch 5: 100%|██████████| 33/33 [00:15<00:00,  2.14it/s, loss=0.0597]


Epoch 5 Loss: 3.6191
Test Metrics: Precision=0.9306, Recall=0.9306, F1=0.9306

Fine-tuning roberta-large (large) with Train Size 260, Split 4...


Map: 100%|██████████| 260/260 [00:00<00:00, 8565.25 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 6517.95 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 9043.36 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 33/33 [00:15<00:00,  2.08it/s, loss=0.4]  


Epoch 1 Loss: 20.4769
Epoch 2/5


Training Epoch 2: 100%|██████████| 33/33 [00:15<00:00,  2.09it/s, loss=0.22] 


Epoch 2 Loss: 9.4797
Epoch 3/5


Training Epoch 3: 100%|██████████| 33/33 [00:15<00:00,  2.15it/s, loss=0.13]  


Epoch 3 Loss: 6.1594
Epoch 4/5


Training Epoch 4: 100%|██████████| 33/33 [00:15<00:00,  2.15it/s, loss=0.143] 


Epoch 4 Loss: 4.4308
Epoch 5/5


Training Epoch 5: 100%|██████████| 33/33 [00:15<00:00,  2.09it/s, loss=0.0759]


Epoch 5 Loss: 3.5301
Test Metrics: Precision=0.9281, Recall=0.9281, F1=0.9281

Fine-tuning roberta-large (large) with Train Size 260, Split 5...


Map: 100%|██████████| 260/260 [00:00<00:00, 8596.44 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 6480.58 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 9301.07 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 33/33 [00:17<00:00,  1.93it/s, loss=0.409]


Epoch 1 Loss: 22.1262
Epoch 2/5


Training Epoch 2: 100%|██████████| 33/33 [00:16<00:00,  2.02it/s, loss=0.231]


Epoch 2 Loss: 9.9270
Epoch 3/5


Training Epoch 3: 100%|██████████| 33/33 [00:16<00:00,  1.96it/s, loss=0.136] 


Epoch 3 Loss: 6.0094
Epoch 4/5


Training Epoch 4: 100%|██████████| 33/33 [00:17<00:00,  1.92it/s, loss=0.0957]


Epoch 4 Loss: 4.1641
Epoch 5/5


Training Epoch 5: 100%|██████████| 33/33 [00:16<00:00,  1.98it/s, loss=0.0686]


Epoch 5 Loss: 2.8735
Test Metrics: Precision=0.9300, Recall=0.9300, F1=0.9300

Fine-tuning roberta-large (large) with Train Size 265, Split 1...


Map: 100%|██████████| 265/265 [00:00<00:00, 8572.35 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 6834.68 examples/s]
Map: 100%|██████████| 930/930 [00:00<00:00, 9180.76 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 34/34 [00:14<00:00,  2.27it/s, loss=0.0113]


Epoch 1 Loss: 17.7091
Epoch 2/5


Training Epoch 2: 100%|██████████| 34/34 [00:14<00:00,  2.30it/s, loss=0.62] 


Epoch 2 Loss: 8.4114
Epoch 3/5


Training Epoch 3: 100%|██████████| 34/34 [00:15<00:00,  2.20it/s, loss=0.112] 


Epoch 3 Loss: 5.6696
Epoch 4/5


Training Epoch 4: 100%|██████████| 34/34 [00:14<00:00,  2.34it/s, loss=0.232] 


Epoch 4 Loss: 4.0027
Epoch 5/5


Training Epoch 5: 100%|██████████| 34/34 [00:14<00:00,  2.29it/s, loss=0.14]  


Epoch 5 Loss: 2.9339
Test Metrics: Precision=0.9293, Recall=0.9293, F1=0.9293

Fine-tuning roberta-large (large) with Train Size 265, Split 2...


Map: 100%|██████████| 265/265 [00:00<00:00, 8050.08 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 6896.17 examples/s]
Map: 100%|██████████| 930/930 [00:00<00:00, 8989.99 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 34/34 [00:14<00:00,  2.28it/s, loss=0.219]


Epoch 1 Loss: 22.4220
Epoch 2/5


Training Epoch 2: 100%|██████████| 34/34 [00:15<00:00,  2.23it/s, loss=0.0217]


Epoch 2 Loss: 9.9534
Epoch 3/5


Training Epoch 3: 100%|██████████| 34/34 [00:14<00:00,  2.32it/s, loss=0.0679]


Epoch 3 Loss: 7.2631
Epoch 4/5


Training Epoch 4: 100%|██████████| 34/34 [00:15<00:00,  2.19it/s, loss=0.283] 


Epoch 4 Loss: 5.6519
Epoch 5/5


Training Epoch 5: 100%|██████████| 34/34 [00:15<00:00,  2.21it/s, loss=0.0552]


Epoch 5 Loss: 4.0964
Test Metrics: Precision=0.9306, Recall=0.9306, F1=0.9306

Fine-tuning roberta-large (large) with Train Size 265, Split 3...


Map: 100%|██████████| 265/265 [00:00<00:00, 8849.45 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 7244.28 examples/s]
Map: 100%|██████████| 930/930 [00:00<00:00, 9380.32 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 34/34 [00:15<00:00,  2.16it/s, loss=0.0757]


Epoch 1 Loss: 18.2784
Epoch 2/5


Training Epoch 2: 100%|██████████| 34/34 [00:15<00:00,  2.20it/s, loss=0.02] 


Epoch 2 Loss: 8.3142
Epoch 3/5


Training Epoch 3: 100%|██████████| 34/34 [00:15<00:00,  2.25it/s, loss=0.366] 


Epoch 3 Loss: 5.6434
Epoch 4/5


Training Epoch 4: 100%|██████████| 34/34 [00:15<00:00,  2.23it/s, loss=0.0448]


Epoch 4 Loss: 3.8328
Epoch 5/5


Training Epoch 5: 100%|██████████| 34/34 [00:15<00:00,  2.22it/s, loss=0.153] 


Epoch 5 Loss: 2.5574
Test Metrics: Precision=0.9324, Recall=0.9324, F1=0.9324

Fine-tuning roberta-large (large) with Train Size 265, Split 4...


Map: 100%|██████████| 265/265 [00:00<00:00, 8938.55 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 5846.72 examples/s]
Map: 100%|██████████| 930/930 [00:00<00:00, 9375.81 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 34/34 [00:16<00:00,  2.12it/s, loss=0.718]


Epoch 1 Loss: 22.8249
Epoch 2/5


Training Epoch 2: 100%|██████████| 34/34 [00:16<00:00,  2.08it/s, loss=0.286]


Epoch 2 Loss: 10.2425
Epoch 3/5


Training Epoch 3: 100%|██████████| 34/34 [00:15<00:00,  2.20it/s, loss=0.517] 


Epoch 3 Loss: 7.4723
Epoch 4/5


Training Epoch 4: 100%|██████████| 34/34 [00:16<00:00,  2.12it/s, loss=0.0904]


Epoch 4 Loss: 5.1891
Epoch 5/5


Training Epoch 5: 100%|██████████| 34/34 [00:15<00:00,  2.16it/s, loss=0.261] 


Epoch 5 Loss: 4.3287
Test Metrics: Precision=0.9268, Recall=0.9268, F1=0.9268

Fine-tuning roberta-large (large) with Train Size 265, Split 5...


Map: 100%|██████████| 265/265 [00:00<00:00, 8673.97 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 7288.94 examples/s]
Map: 100%|██████████| 930/930 [00:00<00:00, 9475.59 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 34/34 [00:16<00:00,  2.06it/s, loss=0.972]


Epoch 1 Loss: 21.0280
Epoch 2/5


Training Epoch 2: 100%|██████████| 34/34 [00:16<00:00,  2.10it/s, loss=0.0996]


Epoch 2 Loss: 10.2241
Epoch 3/5


Training Epoch 3: 100%|██████████| 34/34 [00:16<00:00,  2.02it/s, loss=0.155]


Epoch 3 Loss: 6.8940
Epoch 4/5


Training Epoch 4: 100%|██████████| 34/34 [00:16<00:00,  2.11it/s, loss=0.137] 


Epoch 4 Loss: 4.8538
Epoch 5/5


Training Epoch 5: 100%|██████████| 34/34 [00:17<00:00,  2.00it/s, loss=0.0113]


Epoch 5 Loss: 3.5067
Test Metrics: Precision=0.9345, Recall=0.9345, F1=0.9345

Fine-tuning roberta-large (large) with Train Size 270, Split 1...


Map: 100%|██████████| 270/270 [00:00<00:00, 8482.23 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 5402.19 examples/s]
Map: 100%|██████████| 924/924 [00:00<00:00, 9211.06 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 34/34 [00:15<00:00,  2.20it/s, loss=0.366]


Epoch 1 Loss: 18.6882
Epoch 2/5


Training Epoch 2: 100%|██████████| 34/34 [00:15<00:00,  2.27it/s, loss=0.427]


Epoch 2 Loss: 9.1234
Epoch 3/5


Training Epoch 3: 100%|██████████| 34/34 [00:15<00:00,  2.26it/s, loss=0.163] 


Epoch 3 Loss: 5.6498
Epoch 4/5


Training Epoch 4: 100%|██████████| 34/34 [00:15<00:00,  2.16it/s, loss=0.112] 


Epoch 4 Loss: 3.9267
Epoch 5/5


Training Epoch 5: 100%|██████████| 34/34 [00:15<00:00,  2.20it/s, loss=0.107] 


Epoch 5 Loss: 2.9288
Test Metrics: Precision=0.9285, Recall=0.9285, F1=0.9285

Fine-tuning roberta-large (large) with Train Size 270, Split 2...


Map: 100%|██████████| 270/270 [00:00<00:00, 8562.46 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 6161.05 examples/s]
Map: 100%|██████████| 924/924 [00:00<00:00, 9100.39 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 34/34 [00:15<00:00,  2.25it/s, loss=0.578]


Epoch 1 Loss: 21.3200
Epoch 2/5


Training Epoch 2: 100%|██████████| 34/34 [00:15<00:00,  2.18it/s, loss=0.155]


Epoch 2 Loss: 11.0105
Epoch 3/5


Training Epoch 3: 100%|██████████| 34/34 [00:15<00:00,  2.13it/s, loss=0.188]


Epoch 3 Loss: 8.1207
Epoch 4/5


Training Epoch 4: 100%|██████████| 34/34 [00:15<00:00,  2.14it/s, loss=0.124] 


Epoch 4 Loss: 5.9252
Epoch 5/5


Training Epoch 5: 100%|██████████| 34/34 [00:15<00:00,  2.17it/s, loss=0.135] 


Epoch 5 Loss: 4.7917
Test Metrics: Precision=0.9254, Recall=0.9254, F1=0.9254

Fine-tuning roberta-large (large) with Train Size 270, Split 3...


Map: 100%|██████████| 270/270 [00:00<00:00, 8617.12 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 7395.43 examples/s]
Map: 100%|██████████| 924/924 [00:00<00:00, 9316.58 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 34/34 [00:16<00:00,  2.11it/s, loss=0.288]


Epoch 1 Loss: 20.0505
Epoch 2/5


Training Epoch 2: 100%|██████████| 34/34 [00:15<00:00,  2.13it/s, loss=0.438]


Epoch 2 Loss: 9.5665
Epoch 3/5


Training Epoch 3: 100%|██████████| 34/34 [00:16<00:00,  2.11it/s, loss=0.231] 


Epoch 3 Loss: 6.5757
Epoch 4/5


Training Epoch 4: 100%|██████████| 34/34 [00:16<00:00,  2.08it/s, loss=0.115] 


Epoch 4 Loss: 4.0860
Epoch 5/5


Training Epoch 5: 100%|██████████| 34/34 [00:15<00:00,  2.19it/s, loss=0.0681]


Epoch 5 Loss: 2.8589
Test Metrics: Precision=0.9341, Recall=0.9341, F1=0.9341

Fine-tuning roberta-large (large) with Train Size 270, Split 4...


Map: 100%|██████████| 270/270 [00:00<00:00, 8558.25 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 6522.46 examples/s]
Map: 100%|██████████| 924/924 [00:00<00:00, 8990.71 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 34/34 [00:16<00:00,  2.10it/s, loss=0.265]


Epoch 1 Loss: 21.9830
Epoch 2/5


Training Epoch 2: 100%|██████████| 34/34 [00:16<00:00,  2.11it/s, loss=0.136]


Epoch 2 Loss: 10.0226
Epoch 3/5


Training Epoch 3: 100%|██████████| 34/34 [00:17<00:00,  1.98it/s, loss=0.0341]


Epoch 3 Loss: 6.4716
Epoch 4/5


Training Epoch 4: 100%|██████████| 34/34 [00:16<00:00,  2.10it/s, loss=0.0804]


Epoch 4 Loss: 4.7386
Epoch 5/5


Training Epoch 5: 100%|██████████| 34/34 [00:15<00:00,  2.13it/s, loss=0.0583]


Epoch 5 Loss: 3.2379
Test Metrics: Precision=0.9291, Recall=0.9291, F1=0.9291

Fine-tuning roberta-large (large) with Train Size 270, Split 5...


Map: 100%|██████████| 270/270 [00:00<00:00, 7982.17 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 6377.37 examples/s]
Map: 100%|██████████| 924/924 [00:00<00:00, 8937.74 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 34/34 [00:17<00:00,  2.00it/s, loss=0.582]


Epoch 1 Loss: 22.7089
Epoch 2/5


Training Epoch 2: 100%|██████████| 34/34 [00:17<00:00,  1.99it/s, loss=0.364]


Epoch 2 Loss: 10.5090
Epoch 3/5


Training Epoch 3: 100%|██████████| 34/34 [00:17<00:00,  1.94it/s, loss=0.306]


Epoch 3 Loss: 7.3189
Epoch 4/5


Training Epoch 4: 100%|██████████| 34/34 [00:17<00:00,  1.94it/s, loss=0.107] 


Epoch 4 Loss: 5.2350
Epoch 5/5


Training Epoch 5: 100%|██████████| 34/34 [00:17<00:00,  1.92it/s, loss=0.0941]


Epoch 5 Loss: 4.0718
Test Metrics: Precision=0.9303, Recall=0.9303, F1=0.9303

Fine-tuning roberta-large (large) with Train Size 275, Split 1...


Map: 100%|██████████| 275/275 [00:00<00:00, 8485.75 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 7293.29 examples/s]
Map: 100%|██████████| 918/918 [00:00<00:00, 9369.19 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 35/35 [00:15<00:00,  2.30it/s, loss=0.242]


Epoch 1 Loss: 21.2119
Epoch 2/5


Training Epoch 2: 100%|██████████| 35/35 [00:15<00:00,  2.24it/s, loss=0.171]


Epoch 2 Loss: 9.3991
Epoch 3/5


Training Epoch 3: 100%|██████████| 35/35 [00:15<00:00,  2.25it/s, loss=0.222] 


Epoch 3 Loss: 6.2341
Epoch 4/5


Training Epoch 4: 100%|██████████| 35/35 [00:15<00:00,  2.26it/s, loss=0.0484]


Epoch 4 Loss: 4.1272
Epoch 5/5


Training Epoch 5: 100%|██████████| 35/35 [00:15<00:00,  2.29it/s, loss=0.145] 


Epoch 5 Loss: 3.1440
Test Metrics: Precision=0.9293, Recall=0.9293, F1=0.9293

Fine-tuning roberta-large (large) with Train Size 275, Split 2...


Map: 100%|██████████| 275/275 [00:00<00:00, 8823.36 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 6221.83 examples/s]
Map: 100%|██████████| 918/918 [00:00<00:00, 9267.11 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 35/35 [00:16<00:00,  2.13it/s, loss=0.264]


Epoch 1 Loss: 20.4841
Epoch 2/5


Training Epoch 2: 100%|██████████| 35/35 [00:15<00:00,  2.22it/s, loss=0.159]


Epoch 2 Loss: 10.4669
Epoch 3/5


Training Epoch 3: 100%|██████████| 35/35 [00:16<00:00,  2.18it/s, loss=0.0994]


Epoch 3 Loss: 7.2714
Epoch 4/5


Training Epoch 4: 100%|██████████| 35/35 [00:15<00:00,  2.23it/s, loss=0.0396]


Epoch 4 Loss: 5.2530
Epoch 5/5


Training Epoch 5: 100%|██████████| 35/35 [00:16<00:00,  2.16it/s, loss=0.109] 


Epoch 5 Loss: 4.5011
Test Metrics: Precision=0.9272, Recall=0.9272, F1=0.9272

Fine-tuning roberta-large (large) with Train Size 275, Split 3...


Map: 100%|██████████| 275/275 [00:00<00:00, 8755.38 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 6563.11 examples/s]
Map: 100%|██████████| 918/918 [00:00<00:00, 9275.54 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 35/35 [00:16<00:00,  2.15it/s, loss=0.416]


Epoch 1 Loss: 22.2649
Epoch 2/5


Training Epoch 2: 100%|██████████| 35/35 [00:16<00:00,  2.18it/s, loss=0.305]


Epoch 2 Loss: 10.2605
Epoch 3/5


Training Epoch 3: 100%|██████████| 35/35 [00:16<00:00,  2.17it/s, loss=0.205]


Epoch 3 Loss: 7.0671
Epoch 4/5


Training Epoch 4: 100%|██████████| 35/35 [00:15<00:00,  2.20it/s, loss=0.0805]


Epoch 4 Loss: 5.3470
Epoch 5/5


Training Epoch 5: 100%|██████████| 35/35 [00:16<00:00,  2.06it/s, loss=0.0502]


Epoch 5 Loss: 3.6209
Test Metrics: Precision=0.9314, Recall=0.9314, F1=0.9314

Fine-tuning roberta-large (large) with Train Size 275, Split 4...


Map: 100%|██████████| 275/275 [00:00<00:00, 8759.43 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 8484.56 examples/s]
Map: 100%|██████████| 918/918 [00:00<00:00, 9152.23 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 35/35 [00:16<00:00,  2.07it/s, loss=0.301]


Epoch 1 Loss: 21.8126
Epoch 2/5


Training Epoch 2: 100%|██████████| 35/35 [00:16<00:00,  2.14it/s, loss=0.182]


Epoch 2 Loss: 9.7068
Epoch 3/5


Training Epoch 3: 100%|██████████| 35/35 [00:16<00:00,  2.07it/s, loss=0.222]


Epoch 3 Loss: 6.3662
Epoch 4/5


Training Epoch 4: 100%|██████████| 35/35 [00:16<00:00,  2.08it/s, loss=0.159] 


Epoch 4 Loss: 4.4037
Epoch 5/5


Training Epoch 5: 100%|██████████| 35/35 [00:16<00:00,  2.15it/s, loss=0.0839]


Epoch 5 Loss: 3.3402
Test Metrics: Precision=0.9300, Recall=0.9300, F1=0.9300

Fine-tuning roberta-large (large) with Train Size 275, Split 5...


Map: 100%|██████████| 275/275 [00:00<00:00, 8357.61 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 6178.17 examples/s]
Map: 100%|██████████| 918/918 [00:00<00:00, 9295.20 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 35/35 [00:17<00:00,  1.99it/s, loss=0.293]


Epoch 1 Loss: 21.1128
Epoch 2/5


Training Epoch 2: 100%|██████████| 35/35 [00:17<00:00,  2.05it/s, loss=0.172]


Epoch 2 Loss: 9.7169
Epoch 3/5


Training Epoch 3: 100%|██████████| 35/35 [00:18<00:00,  1.91it/s, loss=0.169] 


Epoch 3 Loss: 6.5327
Epoch 4/5


Training Epoch 4: 100%|██████████| 35/35 [00:17<00:00,  1.94it/s, loss=0.056] 


Epoch 4 Loss: 4.5699
Epoch 5/5


Training Epoch 5: 100%|██████████| 35/35 [00:18<00:00,  1.91it/s, loss=0.0102]


Epoch 5 Loss: 3.2218
Test Metrics: Precision=0.9353, Recall=0.9353, F1=0.9353

Fine-tuning roberta-large (large) with Train Size 280, Split 1...


Map: 100%|██████████| 280/280 [00:00<00:00, 8246.94 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 6914.57 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 9226.37 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 35/35 [00:15<00:00,  2.21it/s, loss=0.36] 


Epoch 1 Loss: 21.5640
Epoch 2/5


Training Epoch 2: 100%|██████████| 35/35 [00:16<00:00,  2.17it/s, loss=0.155]


Epoch 2 Loss: 9.9841
Epoch 3/5


Training Epoch 3: 100%|██████████| 35/35 [00:16<00:00,  2.16it/s, loss=0.154] 


Epoch 3 Loss: 6.1996
Epoch 4/5


Training Epoch 4: 100%|██████████| 35/35 [00:15<00:00,  2.23it/s, loss=0.0888]


Epoch 4 Loss: 4.6307
Epoch 5/5


Training Epoch 5: 100%|██████████| 35/35 [00:15<00:00,  2.26it/s, loss=0.094] 


Epoch 5 Loss: 3.3814
Test Metrics: Precision=0.9300, Recall=0.9300, F1=0.9300

Fine-tuning roberta-large (large) with Train Size 280, Split 2...


Map: 100%|██████████| 280/280 [00:00<00:00, 8491.60 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 6642.19 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 8600.16 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 35/35 [00:16<00:00,  2.07it/s, loss=0.553]


Epoch 1 Loss: 22.4573
Epoch 2/5


Training Epoch 2: 100%|██████████| 35/35 [00:15<00:00,  2.20it/s, loss=0.191]


Epoch 2 Loss: 9.6611
Epoch 3/5


Training Epoch 3: 100%|██████████| 35/35 [00:16<00:00,  2.11it/s, loss=0.285]


Epoch 3 Loss: 6.6566
Epoch 4/5


Training Epoch 4: 100%|██████████| 35/35 [00:16<00:00,  2.13it/s, loss=0.0638]


Epoch 4 Loss: 4.7119
Epoch 5/5


Training Epoch 5: 100%|██████████| 35/35 [00:16<00:00,  2.10it/s, loss=0.122] 


Epoch 5 Loss: 3.4948
Test Metrics: Precision=0.9324, Recall=0.9324, F1=0.9324

Fine-tuning roberta-large (large) with Train Size 280, Split 3...


Map: 100%|██████████| 280/280 [00:00<00:00, 8870.46 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 5691.60 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 9426.15 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 35/35 [00:16<00:00,  2.09it/s, loss=0.289]


Epoch 1 Loss: 20.8409
Epoch 2/5


Training Epoch 2: 100%|██████████| 35/35 [00:16<00:00,  2.11it/s, loss=0.25] 


Epoch 2 Loss: 13.0033
Epoch 3/5


Training Epoch 3: 100%|██████████| 35/35 [00:15<00:00,  2.24it/s, loss=0.185]


Epoch 3 Loss: 8.0362
Epoch 4/5


Training Epoch 4: 100%|██████████| 35/35 [00:16<00:00,  2.12it/s, loss=0.228]


Epoch 4 Loss: 6.0398
Epoch 5/5


Training Epoch 5: 100%|██████████| 35/35 [00:16<00:00,  2.09it/s, loss=0.243] 


Epoch 5 Loss: 4.8552
Test Metrics: Precision=0.9278, Recall=0.9278, F1=0.9278

Fine-tuning roberta-large (large) with Train Size 280, Split 4...


Map: 100%|██████████| 280/280 [00:00<00:00, 8558.06 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 6218.39 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 3898.13 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 35/35 [00:17<00:00,  2.05it/s, loss=0.268]


Epoch 1 Loss: 20.6117
Epoch 2/5


Training Epoch 2: 100%|██████████| 35/35 [00:17<00:00,  1.98it/s, loss=0.35] 


Epoch 2 Loss: 9.6616
Epoch 3/5


Training Epoch 3: 100%|██████████| 35/35 [00:16<00:00,  2.06it/s, loss=0.184]


Epoch 3 Loss: 7.0119
Epoch 4/5


Training Epoch 4: 100%|██████████| 35/35 [00:17<00:00,  2.02it/s, loss=0.151] 


Epoch 4 Loss: 5.0437
Epoch 5/5


Training Epoch 5: 100%|██████████| 35/35 [00:16<00:00,  2.06it/s, loss=0.0726]


Epoch 5 Loss: 4.0515
Test Metrics: Precision=0.9280, Recall=0.9280, F1=0.9280

Fine-tuning roberta-large (large) with Train Size 280, Split 5...


Map: 100%|██████████| 280/280 [00:00<00:00, 8296.17 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 6413.66 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 9272.03 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 35/35 [00:18<00:00,  1.87it/s, loss=0.428]


Epoch 1 Loss: 21.2052
Epoch 2/5


Training Epoch 2: 100%|██████████| 35/35 [00:17<00:00,  1.94it/s, loss=0.283]


Epoch 2 Loss: 10.0920
Epoch 3/5


Training Epoch 3: 100%|██████████| 35/35 [00:17<00:00,  1.96it/s, loss=0.204]


Epoch 3 Loss: 6.8718
Epoch 4/5


Training Epoch 4: 100%|██████████| 35/35 [00:17<00:00,  1.99it/s, loss=0.108] 


Epoch 4 Loss: 4.6295
Epoch 5/5


Training Epoch 5: 100%|██████████| 35/35 [00:17<00:00,  1.95it/s, loss=0.121] 


Epoch 5 Loss: 3.3299
Test Metrics: Precision=0.9334, Recall=0.9334, F1=0.9334

Fine-tuning roberta-large (large) with Train Size 285, Split 1...


Map: 100%|██████████| 285/285 [00:00<00:00, 8555.94 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 7819.82 examples/s]
Map: 100%|██████████| 906/906 [00:00<00:00, 9351.69 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 36/36 [00:16<00:00,  2.18it/s, loss=0.294]


Epoch 1 Loss: 22.5789
Epoch 2/5


Training Epoch 2: 100%|██████████| 36/36 [00:16<00:00,  2.19it/s, loss=0.353]


Epoch 2 Loss: 10.4589
Epoch 3/5


Training Epoch 3: 100%|██████████| 36/36 [00:17<00:00,  2.12it/s, loss=0.118]


Epoch 3 Loss: 7.4702
Epoch 4/5


Training Epoch 4: 100%|██████████| 36/36 [00:15<00:00,  2.26it/s, loss=0.0891]


Epoch 4 Loss: 5.3359
Epoch 5/5


Training Epoch 5: 100%|██████████| 36/36 [00:15<00:00,  2.26it/s, loss=0.0846]


Epoch 5 Loss: 4.1805
Test Metrics: Precision=0.9311, Recall=0.9311, F1=0.9311

Fine-tuning roberta-large (large) with Train Size 285, Split 2...


Map: 100%|██████████| 285/285 [00:00<00:00, 8516.75 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 6453.47 examples/s]
Map: 100%|██████████| 906/906 [00:00<00:00, 9234.92 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 36/36 [00:16<00:00,  2.20it/s, loss=0.22] 


Epoch 1 Loss: 21.2791
Epoch 2/5


Training Epoch 2: 100%|██████████| 36/36 [00:16<00:00,  2.16it/s, loss=0.281]


Epoch 2 Loss: 9.4957
Epoch 3/5


Training Epoch 3: 100%|██████████| 36/36 [00:16<00:00,  2.19it/s, loss=0.142] 


Epoch 3 Loss: 6.3797
Epoch 4/5


Training Epoch 4: 100%|██████████| 36/36 [00:16<00:00,  2.17it/s, loss=0.152] 


Epoch 4 Loss: 4.7820
Epoch 5/5


Training Epoch 5: 100%|██████████| 36/36 [00:16<00:00,  2.17it/s, loss=0.0652]


Epoch 5 Loss: 3.3475
Test Metrics: Precision=0.9335, Recall=0.9335, F1=0.9335

Fine-tuning roberta-large (large) with Train Size 285, Split 3...


Map: 100%|██████████| 285/285 [00:00<00:00, 8213.28 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 5019.95 examples/s]
Map: 100%|██████████| 906/906 [00:00<00:00, 9691.43 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 36/36 [00:16<00:00,  2.19it/s, loss=0.392]


Epoch 1 Loss: 20.7407
Epoch 2/5


Training Epoch 2: 100%|██████████| 36/36 [00:17<00:00,  2.09it/s, loss=0.164]


Epoch 2 Loss: 9.4976
Epoch 3/5


Training Epoch 3: 100%|██████████| 36/36 [00:16<00:00,  2.19it/s, loss=0.211] 


Epoch 3 Loss: 6.3784
Epoch 4/5


Training Epoch 4: 100%|██████████| 36/36 [00:16<00:00,  2.14it/s, loss=0.0841]


Epoch 4 Loss: 4.2818
Epoch 5/5


Training Epoch 5: 100%|██████████| 36/36 [00:16<00:00,  2.16it/s, loss=0.122] 


Epoch 5 Loss: 3.2065
Test Metrics: Precision=0.9320, Recall=0.9320, F1=0.9320

Fine-tuning roberta-large (large) with Train Size 285, Split 4...


Map: 100%|██████████| 285/285 [00:00<00:00, 8707.58 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 6400.09 examples/s]
Map: 100%|██████████| 906/906 [00:00<00:00, 9292.69 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 36/36 [00:17<00:00,  2.08it/s, loss=0.465]


Epoch 1 Loss: 20.8067
Epoch 2/5


Training Epoch 2: 100%|██████████| 36/36 [00:16<00:00,  2.13it/s, loss=0.265]


Epoch 2 Loss: 10.5076
Epoch 3/5


Training Epoch 3: 100%|██████████| 36/36 [00:17<00:00,  2.04it/s, loss=0.0811]


Epoch 3 Loss: 6.6846
Epoch 4/5


Training Epoch 4: 100%|██████████| 36/36 [00:17<00:00,  2.06it/s, loss=0.122] 


Epoch 4 Loss: 4.9456
Epoch 5/5


Training Epoch 5: 100%|██████████| 36/36 [00:16<00:00,  2.18it/s, loss=0.0674]


Epoch 5 Loss: 3.6822
Test Metrics: Precision=0.9307, Recall=0.9307, F1=0.9307

Fine-tuning roberta-large (large) with Train Size 285, Split 5...


Map: 100%|██████████| 285/285 [00:00<00:00, 8658.13 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 7307.60 examples/s]
Map: 100%|██████████| 906/906 [00:00<00:00, 9243.95 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 36/36 [00:17<00:00,  2.00it/s, loss=0.558]


Epoch 1 Loss: 21.8396
Epoch 2/5


Training Epoch 2: 100%|██████████| 36/36 [00:17<00:00,  2.03it/s, loss=0.353]


Epoch 2 Loss: 10.4894
Epoch 3/5


Training Epoch 3: 100%|██████████| 36/36 [00:18<00:00,  1.97it/s, loss=0.178]


Epoch 3 Loss: 7.6606
Epoch 4/5


Training Epoch 4: 100%|██████████| 36/36 [00:18<00:00,  2.00it/s, loss=0.0587]


Epoch 4 Loss: 5.5537
Epoch 5/5


Training Epoch 5: 100%|██████████| 36/36 [00:18<00:00,  1.97it/s, loss=0.111] 


Epoch 5 Loss: 4.3981
Test Metrics: Precision=0.9332, Recall=0.9332, F1=0.9332

Fine-tuning roberta-large (large) with Train Size 290, Split 1...


Map: 100%|██████████| 290/290 [00:00<00:00, 8587.30 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 6764.44 examples/s]
Map: 100%|██████████| 900/900 [00:00<00:00, 9428.75 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 37/37 [00:15<00:00,  2.35it/s, loss=0.438]


Epoch 1 Loss: 24.4803
Epoch 2/5


Training Epoch 2: 100%|██████████| 37/37 [00:15<00:00,  2.31it/s, loss=0.471]


Epoch 2 Loss: 10.4883
Epoch 3/5


Training Epoch 3: 100%|██████████| 37/37 [00:16<00:00,  2.26it/s, loss=0.0396]


Epoch 3 Loss: 6.9186
Epoch 4/5


Training Epoch 4: 100%|██████████| 37/37 [00:16<00:00,  2.26it/s, loss=0.0412]


Epoch 4 Loss: 4.7681
Epoch 5/5


Training Epoch 5: 100%|██████████| 37/37 [00:16<00:00,  2.27it/s, loss=0.141] 


Epoch 5 Loss: 3.6920
Test Metrics: Precision=0.9296, Recall=0.9296, F1=0.9296

Fine-tuning roberta-large (large) with Train Size 290, Split 2...


Map: 100%|██████████| 290/290 [00:00<00:00, 8474.23 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 6496.89 examples/s]
Map: 100%|██████████| 900/900 [00:00<00:00, 9168.19 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 37/37 [00:16<00:00,  2.19it/s, loss=0.369]


Epoch 1 Loss: 23.6976
Epoch 2/5


Training Epoch 2: 100%|██████████| 37/37 [00:16<00:00,  2.19it/s, loss=0.244]


Epoch 2 Loss: 11.8247
Epoch 3/5


Training Epoch 3: 100%|██████████| 37/37 [00:16<00:00,  2.19it/s, loss=0.169]


Epoch 3 Loss: 7.9244
Epoch 4/5


Training Epoch 4: 100%|██████████| 37/37 [00:16<00:00,  2.20it/s, loss=0.157] 


Epoch 4 Loss: 5.5504
Epoch 5/5


Training Epoch 5: 100%|██████████| 37/37 [00:16<00:00,  2.18it/s, loss=0.24]  


Epoch 5 Loss: 4.3231
Test Metrics: Precision=0.9236, Recall=0.9236, F1=0.9236

Fine-tuning roberta-large (large) with Train Size 290, Split 3...


Map: 100%|██████████| 290/290 [00:00<00:00, 8764.07 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 6087.22 examples/s]
Map: 100%|██████████| 900/900 [00:00<00:00, 9342.34 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 37/37 [00:17<00:00,  2.17it/s, loss=0.416]


Epoch 1 Loss: 21.0100
Epoch 2/5


Training Epoch 2: 100%|██████████| 37/37 [00:16<00:00,  2.18it/s, loss=0.22] 


Epoch 2 Loss: 9.9023
Epoch 3/5


Training Epoch 3: 100%|██████████| 37/37 [00:17<00:00,  2.15it/s, loss=0.0177]


Epoch 3 Loss: 6.7316
Epoch 4/5


Training Epoch 4: 100%|██████████| 37/37 [00:17<00:00,  2.10it/s, loss=0.119] 


Epoch 4 Loss: 4.7121
Epoch 5/5


Training Epoch 5: 100%|██████████| 37/37 [00:16<00:00,  2.27it/s, loss=0.11]  


Epoch 5 Loss: 3.5591
Test Metrics: Precision=0.9344, Recall=0.9344, F1=0.9344

Fine-tuning roberta-large (large) with Train Size 290, Split 4...


Map: 100%|██████████| 290/290 [00:00<00:00, 8943.21 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 7495.14 examples/s]
Map: 100%|██████████| 900/900 [00:00<00:00, 9246.20 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 37/37 [00:16<00:00,  2.19it/s, loss=0.28] 


Epoch 1 Loss: 21.2290
Epoch 2/5


Training Epoch 2: 100%|██████████| 37/37 [00:17<00:00,  2.09it/s, loss=0.412]


Epoch 2 Loss: 10.6650
Epoch 3/5


Training Epoch 3: 100%|██████████| 37/37 [00:16<00:00,  2.18it/s, loss=0.0872]


Epoch 3 Loss: 7.1823
Epoch 4/5


Training Epoch 4: 100%|██████████| 37/37 [00:16<00:00,  2.18it/s, loss=0.175] 


Epoch 4 Loss: 4.8946
Epoch 5/5


Training Epoch 5: 100%|██████████| 37/37 [00:17<00:00,  2.14it/s, loss=0.0483]


Epoch 5 Loss: 3.6990
Test Metrics: Precision=0.9305, Recall=0.9305, F1=0.9305

Fine-tuning roberta-large (large) with Train Size 290, Split 5...


Map: 100%|██████████| 290/290 [00:00<00:00, 8496.90 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 7801.60 examples/s]
Map: 100%|██████████| 900/900 [00:00<00:00, 9215.66 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 37/37 [00:18<00:00,  2.01it/s, loss=0.37] 


Epoch 1 Loss: 25.0863
Epoch 2/5


Training Epoch 2: 100%|██████████| 37/37 [00:18<00:00,  2.04it/s, loss=0.329]


Epoch 2 Loss: 10.8786
Epoch 3/5


Training Epoch 3: 100%|██████████| 37/37 [00:18<00:00,  2.01it/s, loss=0.212] 


Epoch 3 Loss: 7.2374
Epoch 4/5


Training Epoch 4: 100%|██████████| 37/37 [00:18<00:00,  2.05it/s, loss=0.0469]


Epoch 4 Loss: 4.8456
Epoch 5/5


Training Epoch 5: 100%|██████████| 37/37 [00:17<00:00,  2.06it/s, loss=0.0556]


Epoch 5 Loss: 3.4150
Test Metrics: Precision=0.9344, Recall=0.9344, F1=0.9344

Fine-tuning roberta-large (large) with Train Size 295, Split 1...


Map: 100%|██████████| 295/295 [00:00<00:00, 8221.34 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 5818.85 examples/s]
Map: 100%|██████████| 894/894 [00:00<00:00, 9308.41 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 37/37 [00:16<00:00,  2.22it/s, loss=0.292]


Epoch 1 Loss: 20.6679
Epoch 2/5


Training Epoch 2: 100%|██████████| 37/37 [00:16<00:00,  2.18it/s, loss=0.189] 


Epoch 2 Loss: 9.0645
Epoch 3/5


Training Epoch 3: 100%|██████████| 37/37 [00:16<00:00,  2.23it/s, loss=0.154] 


Epoch 3 Loss: 6.1643
Epoch 4/5


Training Epoch 4: 100%|██████████| 37/37 [00:15<00:00,  2.33it/s, loss=0.262] 


Epoch 4 Loss: 4.0962
Epoch 5/5


Training Epoch 5: 100%|██████████| 37/37 [00:16<00:00,  2.24it/s, loss=0.0805]


Epoch 5 Loss: 3.0659
Test Metrics: Precision=0.9326, Recall=0.9326, F1=0.9326

Fine-tuning roberta-large (large) with Train Size 295, Split 2...


Map: 100%|██████████| 295/295 [00:00<00:00, 8941.27 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 7189.33 examples/s]
Map: 100%|██████████| 894/894 [00:00<00:00, 9280.54 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 37/37 [00:17<00:00,  2.12it/s, loss=0.52] 


Epoch 1 Loss: 25.0693
Epoch 2/5


Training Epoch 2: 100%|██████████| 37/37 [00:17<00:00,  2.12it/s, loss=0.284]


Epoch 2 Loss: 10.9040
Epoch 3/5


Training Epoch 3: 100%|██████████| 37/37 [00:16<00:00,  2.19it/s, loss=0.227]


Epoch 3 Loss: 7.9495
Epoch 4/5


Training Epoch 4: 100%|██████████| 37/37 [00:17<00:00,  2.17it/s, loss=0.151] 


Epoch 4 Loss: 5.6081
Epoch 5/5


Training Epoch 5: 100%|██████████| 37/37 [00:16<00:00,  2.22it/s, loss=0.158] 


Epoch 5 Loss: 4.3527
Test Metrics: Precision=0.9302, Recall=0.9302, F1=0.9302

Fine-tuning roberta-large (large) with Train Size 295, Split 3...


Map: 100%|██████████| 295/295 [00:00<00:00, 8145.46 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 7561.23 examples/s]
Map: 100%|██████████| 894/894 [00:00<00:00, 9157.51 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 37/37 [00:17<00:00,  2.16it/s, loss=0.25] 


Epoch 1 Loss: 21.2890
Epoch 2/5


Training Epoch 2: 100%|██████████| 37/37 [00:17<00:00,  2.14it/s, loss=0.101]


Epoch 2 Loss: 9.7054
Epoch 3/5


Training Epoch 3: 100%|██████████| 37/37 [00:16<00:00,  2.22it/s, loss=0.195] 


Epoch 3 Loss: 6.3692
Epoch 4/5


Training Epoch 4: 100%|██████████| 37/37 [00:17<00:00,  2.10it/s, loss=0.11]  


Epoch 4 Loss: 3.9771
Epoch 5/5


Training Epoch 5: 100%|██████████| 37/37 [00:17<00:00,  2.15it/s, loss=0.0938]


Epoch 5 Loss: 3.0020
Test Metrics: Precision=0.9335, Recall=0.9335, F1=0.9335

Fine-tuning roberta-large (large) with Train Size 295, Split 4...


Map: 100%|██████████| 295/295 [00:00<00:00, 8860.15 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 6450.93 examples/s]
Map: 100%|██████████| 894/894 [00:00<00:00, 9213.79 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 37/37 [00:17<00:00,  2.15it/s, loss=0.411]


Epoch 1 Loss: 22.9195
Epoch 2/5


Training Epoch 2: 100%|██████████| 37/37 [00:17<00:00,  2.10it/s, loss=0.214]


Epoch 2 Loss: 10.4028
Epoch 3/5


Training Epoch 3: 100%|██████████| 37/37 [00:17<00:00,  2.07it/s, loss=0.173]


Epoch 3 Loss: 7.9190
Epoch 4/5


Training Epoch 4: 100%|██████████| 37/37 [00:18<00:00,  2.05it/s, loss=0.151]


Epoch 4 Loss: 6.4321
Epoch 5/5


Training Epoch 5: 100%|██████████| 37/37 [00:18<00:00,  2.03it/s, loss=0.121] 


Epoch 5 Loss: 5.0473
Test Metrics: Precision=0.9267, Recall=0.9267, F1=0.9267

Fine-tuning roberta-large (large) with Train Size 295, Split 5...


Map: 100%|██████████| 295/295 [00:00<00:00, 8585.88 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 7366.75 examples/s]
Map: 100%|██████████| 894/894 [00:00<00:00, 9210.85 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 37/37 [00:18<00:00,  2.00it/s, loss=0.465]


Epoch 1 Loss: 24.5265
Epoch 2/5


Training Epoch 2: 100%|██████████| 37/37 [00:18<00:00,  1.99it/s, loss=0.233]


Epoch 2 Loss: 12.1432
Epoch 3/5


Training Epoch 3: 100%|██████████| 37/37 [00:18<00:00,  1.95it/s, loss=0.154] 


Epoch 3 Loss: 8.0750
Epoch 4/5


Training Epoch 4: 100%|██████████| 37/37 [00:18<00:00,  1.96it/s, loss=0.195] 


Epoch 4 Loss: 5.4487
Epoch 5/5


Training Epoch 5: 100%|██████████| 37/37 [00:18<00:00,  1.96it/s, loss=0.0718]


Epoch 5 Loss: 3.9563
Test Metrics: Precision=0.9330, Recall=0.9330, F1=0.9330

Fine-tuning roberta-large (large) with Train Size 300, Split 1...


Map: 100%|██████████| 300/300 [00:00<00:00, 8461.09 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 5902.48 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 9702.81 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 38/38 [00:16<00:00,  2.24it/s, loss=0.33] 


Epoch 1 Loss: 23.2836
Epoch 2/5


Training Epoch 2: 100%|██████████| 38/38 [00:16<00:00,  2.27it/s, loss=0.21] 


Epoch 2 Loss: 11.9312
Epoch 3/5


Training Epoch 3: 100%|██████████| 38/38 [00:16<00:00,  2.24it/s, loss=0.164]


Epoch 3 Loss: 8.0464
Epoch 4/5


Training Epoch 4: 100%|██████████| 38/38 [00:17<00:00,  2.23it/s, loss=0.0581]


Epoch 4 Loss: 5.5693
Epoch 5/5


Training Epoch 5: 100%|██████████| 38/38 [00:17<00:00,  2.20it/s, loss=0.0825]


Epoch 5 Loss: 4.2192
Test Metrics: Precision=0.9319, Recall=0.9319, F1=0.9319

Fine-tuning roberta-large (large) with Train Size 300, Split 2...


Map: 100%|██████████| 300/300 [00:00<00:00, 8884.92 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 7199.08 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 9241.01 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 38/38 [00:16<00:00,  2.28it/s, loss=0.359]


Epoch 1 Loss: 25.5084
Epoch 2/5


Training Epoch 2: 100%|██████████| 38/38 [00:17<00:00,  2.22it/s, loss=0.39] 


Epoch 2 Loss: 12.4061
Epoch 3/5


Training Epoch 3: 100%|██████████| 38/38 [00:17<00:00,  2.20it/s, loss=0.371]


Epoch 3 Loss: 9.1651
Epoch 4/5


Training Epoch 4: 100%|██████████| 38/38 [00:16<00:00,  2.26it/s, loss=0.149]


Epoch 4 Loss: 6.9675
Epoch 5/5


Training Epoch 5: 100%|██████████| 38/38 [00:17<00:00,  2.19it/s, loss=0.124] 


Epoch 5 Loss: 5.3011
Test Metrics: Precision=0.9260, Recall=0.9260, F1=0.9260

Fine-tuning roberta-large (large) with Train Size 300, Split 3...


Map: 100%|██████████| 300/300 [00:00<00:00, 8576.96 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 7027.40 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 9225.08 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 38/38 [00:17<00:00,  2.21it/s, loss=0.303]


Epoch 1 Loss: 22.6653
Epoch 2/5


Training Epoch 2: 100%|██████████| 38/38 [00:17<00:00,  2.15it/s, loss=0.167]


Epoch 2 Loss: 10.1035
Epoch 3/5


Training Epoch 3: 100%|██████████| 38/38 [00:17<00:00,  2.16it/s, loss=0.153] 


Epoch 3 Loss: 6.3684
Epoch 4/5


Training Epoch 4: 100%|██████████| 38/38 [00:17<00:00,  2.17it/s, loss=0.0924]


Epoch 4 Loss: 4.1447
Epoch 5/5


Training Epoch 5: 100%|██████████| 38/38 [00:17<00:00,  2.15it/s, loss=0.0297]


Epoch 5 Loss: 2.8873
Test Metrics: Precision=0.9330, Recall=0.9330, F1=0.9330

Fine-tuning roberta-large (large) with Train Size 300, Split 4...


Map: 100%|██████████| 300/300 [00:00<00:00, 8343.83 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 6106.88 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 9281.35 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 38/38 [00:17<00:00,  2.15it/s, loss=0.554]


Epoch 1 Loss: 20.6410
Epoch 2/5


Training Epoch 2: 100%|██████████| 38/38 [00:18<00:00,  2.04it/s, loss=0.161]


Epoch 2 Loss: 10.0378
Epoch 3/5


Training Epoch 3: 100%|██████████| 38/38 [00:17<00:00,  2.14it/s, loss=0.093] 


Epoch 3 Loss: 6.6637
Epoch 4/5


Training Epoch 4: 100%|██████████| 38/38 [00:18<00:00,  2.09it/s, loss=0.109] 


Epoch 4 Loss: 4.7423
Epoch 5/5


Training Epoch 5: 100%|██████████| 38/38 [00:18<00:00,  2.03it/s, loss=0.155] 


Epoch 5 Loss: 3.7883
Test Metrics: Precision=0.9297, Recall=0.9297, F1=0.9297

Fine-tuning roberta-large (large) with Train Size 300, Split 5...


Map: 100%|██████████| 300/300 [00:00<00:00, 8397.68 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 6632.88 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 9699.28 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 38/38 [00:19<00:00,  1.97it/s, loss=0.376]


Epoch 1 Loss: 22.5795
Epoch 2/5


Training Epoch 2: 100%|██████████| 38/38 [00:18<00:00,  2.09it/s, loss=0.165]


Epoch 2 Loss: 11.1723
Epoch 3/5


Training Epoch 3: 100%|██████████| 38/38 [00:18<00:00,  2.02it/s, loss=0.0627]


Epoch 3 Loss: 7.4842
Epoch 4/5


Training Epoch 4: 100%|██████████| 38/38 [00:18<00:00,  2.07it/s, loss=0.187] 


Epoch 4 Loss: 5.2690
Epoch 5/5


Training Epoch 5: 100%|██████████| 38/38 [00:18<00:00,  2.08it/s, loss=0.0387]


Epoch 5 Loss: 4.2916
Test Metrics: Precision=0.9332, Recall=0.9332, F1=0.9332

Fine-tuning roberta-large (large) with Train Size 305, Split 1...


Map: 100%|██████████| 305/305 [00:00<00:00, 7936.56 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 6337.06 examples/s]
Map: 100%|██████████| 882/882 [00:00<00:00, 9307.50 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 39/39 [00:17<00:00,  2.27it/s, loss=0.418]


Epoch 1 Loss: 30.3667
Epoch 2/5


Training Epoch 2: 100%|██████████| 39/39 [00:17<00:00,  2.21it/s, loss=0.521]


Epoch 2 Loss: 16.3167
Epoch 3/5


Training Epoch 3: 100%|██████████| 39/39 [00:17<00:00,  2.28it/s, loss=0.0383]


Epoch 3 Loss: 11.0117
Epoch 4/5


Training Epoch 4: 100%|██████████| 39/39 [00:17<00:00,  2.23it/s, loss=0.197]


Epoch 4 Loss: 8.4326
Epoch 5/5


Training Epoch 5: 100%|██████████| 39/39 [00:17<00:00,  2.20it/s, loss=0.267] 


Epoch 5 Loss: 7.2053
Test Metrics: Precision=0.9217, Recall=0.9217, F1=0.9217

Fine-tuning roberta-large (large) with Train Size 305, Split 2...


Map: 100%|██████████| 305/305 [00:00<00:00, 8724.13 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 6881.46 examples/s]
Map: 100%|██████████| 882/882 [00:00<00:00, 9115.65 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 39/39 [00:18<00:00,  2.16it/s, loss=0.102]


Epoch 1 Loss: 23.6937
Epoch 2/5


Training Epoch 2: 100%|██████████| 39/39 [00:17<00:00,  2.17it/s, loss=0.0964]


Epoch 2 Loss: 10.7400
Epoch 3/5


Training Epoch 3: 100%|██████████| 39/39 [00:17<00:00,  2.28it/s, loss=0.542] 


Epoch 3 Loss: 8.3187
Epoch 4/5


Training Epoch 4: 100%|██████████| 39/39 [00:17<00:00,  2.28it/s, loss=0.0911]


Epoch 4 Loss: 6.1674
Epoch 5/5


Training Epoch 5: 100%|██████████| 39/39 [00:17<00:00,  2.23it/s, loss=0.0168]


Epoch 5 Loss: 4.4738
Test Metrics: Precision=0.9282, Recall=0.9282, F1=0.9282

Fine-tuning roberta-large (large) with Train Size 305, Split 3...


Map: 100%|██████████| 305/305 [00:00<00:00, 8542.09 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 7888.16 examples/s]
Map: 100%|██████████| 882/882 [00:00<00:00, 9187.12 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 39/39 [00:17<00:00,  2.18it/s, loss=0.119]


Epoch 1 Loss: 21.8702
Epoch 2/5


Training Epoch 2: 100%|██████████| 39/39 [00:17<00:00,  2.19it/s, loss=0.0188]


Epoch 2 Loss: 9.6346
Epoch 3/5


Training Epoch 3: 100%|██████████| 39/39 [00:17<00:00,  2.19it/s, loss=0.128] 


Epoch 3 Loss: 6.5688
Epoch 4/5


Training Epoch 4: 100%|██████████| 39/39 [00:17<00:00,  2.18it/s, loss=0.0661]


Epoch 4 Loss: 4.3032
Epoch 5/5


Training Epoch 5: 100%|██████████| 39/39 [00:18<00:00,  2.16it/s, loss=0.0113]


Epoch 5 Loss: 3.1694
Test Metrics: Precision=0.9316, Recall=0.9316, F1=0.9316

Fine-tuning roberta-large (large) with Train Size 305, Split 4...


Map: 100%|██████████| 305/305 [00:00<00:00, 8296.56 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 7161.72 examples/s]
Map: 100%|██████████| 882/882 [00:00<00:00, 9253.11 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 39/39 [00:18<00:00,  2.13it/s, loss=0.462]


Epoch 1 Loss: 22.2511
Epoch 2/5


Training Epoch 2: 100%|██████████| 39/39 [00:18<00:00,  2.11it/s, loss=0.193]


Epoch 2 Loss: 10.8069
Epoch 3/5


Training Epoch 3: 100%|██████████| 39/39 [00:18<00:00,  2.07it/s, loss=0.0169]


Epoch 3 Loss: 6.8771
Epoch 4/5


Training Epoch 4: 100%|██████████| 39/39 [00:18<00:00,  2.12it/s, loss=0.00646]


Epoch 4 Loss: 5.2118
Epoch 5/5


Training Epoch 5: 100%|██████████| 39/39 [00:18<00:00,  2.13it/s, loss=0.00105]


Epoch 5 Loss: 3.8178
Test Metrics: Precision=0.9315, Recall=0.9315, F1=0.9315

Fine-tuning roberta-large (large) with Train Size 305, Split 5...


Map: 100%|██████████| 305/305 [00:00<00:00, 8567.66 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 8325.28 examples/s]
Map: 100%|██████████| 882/882 [00:00<00:00, 9124.66 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 39/39 [00:19<00:00,  2.04it/s, loss=0.014]


Epoch 1 Loss: 20.3828
Epoch 2/5


Training Epoch 2: 100%|██████████| 39/39 [00:19<00:00,  2.00it/s, loss=0.0537]


Epoch 2 Loss: 10.2395
Epoch 3/5


Training Epoch 3: 100%|██████████| 39/39 [00:19<00:00,  2.01it/s, loss=0.109]


Epoch 3 Loss: 7.3281
Epoch 4/5


Training Epoch 4: 100%|██████████| 39/39 [00:19<00:00,  2.01it/s, loss=0.0261]


Epoch 4 Loss: 4.9427
Epoch 5/5


Training Epoch 5: 100%|██████████| 39/39 [00:18<00:00,  2.06it/s, loss=0.00558]


Epoch 5 Loss: 3.6632
Test Metrics: Precision=0.9344, Recall=0.9344, F1=0.9344

Fine-tuning roberta-large (large) with Train Size 310, Split 1...


Map: 100%|██████████| 310/310 [00:00<00:00, 8107.66 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 6990.32 examples/s]
Map: 100%|██████████| 876/876 [00:00<00:00, 9145.88 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 39/39 [00:17<00:00,  2.24it/s, loss=0.545]


Epoch 1 Loss: 25.2345
Epoch 2/5


Training Epoch 2: 100%|██████████| 39/39 [00:16<00:00,  2.30it/s, loss=0.367]


Epoch 2 Loss: 11.2723
Epoch 3/5


Training Epoch 3: 100%|██████████| 39/39 [00:17<00:00,  2.21it/s, loss=0.151] 


Epoch 3 Loss: 7.6912
Epoch 4/5


Training Epoch 4: 100%|██████████| 39/39 [00:18<00:00,  2.16it/s, loss=0.163] 


Epoch 4 Loss: 5.4359
Epoch 5/5


Training Epoch 5: 100%|██████████| 39/39 [00:17<00:00,  2.28it/s, loss=0.167] 


Epoch 5 Loss: 4.1639
Test Metrics: Precision=0.9307, Recall=0.9307, F1=0.9307

Fine-tuning roberta-large (large) with Train Size 310, Split 2...


Map: 100%|██████████| 310/310 [00:00<00:00, 8607.12 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 6417.27 examples/s]
Map: 100%|██████████| 876/876 [00:00<00:00, 9050.63 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 39/39 [00:17<00:00,  2.29it/s, loss=0.3]  


Epoch 1 Loss: 22.5795
Epoch 2/5


Training Epoch 2: 100%|██████████| 39/39 [00:17<00:00,  2.23it/s, loss=0.345]


Epoch 2 Loss: 11.1100
Epoch 3/5


Training Epoch 3: 100%|██████████| 39/39 [00:17<00:00,  2.24it/s, loss=0.178] 


Epoch 3 Loss: 7.6203
Epoch 4/5


Training Epoch 4: 100%|██████████| 39/39 [00:16<00:00,  2.33it/s, loss=0.162] 


Epoch 4 Loss: 5.5696
Epoch 5/5


Training Epoch 5: 100%|██████████| 39/39 [00:17<00:00,  2.27it/s, loss=0.137] 


Epoch 5 Loss: 4.3071
Test Metrics: Precision=0.9285, Recall=0.9285, F1=0.9285

Fine-tuning roberta-large (large) with Train Size 310, Split 3...


Map: 100%|██████████| 310/310 [00:00<00:00, 8698.91 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 6184.96 examples/s]
Map: 100%|██████████| 876/876 [00:00<00:00, 9543.82 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 39/39 [00:17<00:00,  2.19it/s, loss=0.553]


Epoch 1 Loss: 25.1682
Epoch 2/5


Training Epoch 2: 100%|██████████| 39/39 [00:17<00:00,  2.17it/s, loss=0.414]


Epoch 2 Loss: 14.6520
Epoch 3/5


Training Epoch 3: 100%|██████████| 39/39 [00:18<00:00,  2.12it/s, loss=0.185]


Epoch 3 Loss: 10.1810
Epoch 4/5


Training Epoch 4: 100%|██████████| 39/39 [00:17<00:00,  2.18it/s, loss=0.264] 


Epoch 4 Loss: 7.9230
Epoch 5/5


Training Epoch 5: 100%|██████████| 39/39 [00:18<00:00,  2.15it/s, loss=0.142] 


Epoch 5 Loss: 6.7904
Test Metrics: Precision=0.9213, Recall=0.9213, F1=0.9213

Fine-tuning roberta-large (large) with Train Size 310, Split 4...


Map: 100%|██████████| 310/310 [00:00<00:00, 8937.12 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 6363.71 examples/s]
Map: 100%|██████████| 876/876 [00:00<00:00, 9115.29 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 39/39 [00:18<00:00,  2.09it/s, loss=0.209]


Epoch 1 Loss: 22.8056
Epoch 2/5


Training Epoch 2: 100%|██████████| 39/39 [00:18<00:00,  2.15it/s, loss=0.279]


Epoch 2 Loss: 10.5722
Epoch 3/5


Training Epoch 3: 100%|██████████| 39/39 [00:18<00:00,  2.09it/s, loss=0.0962]


Epoch 3 Loss: 7.7718
Epoch 4/5


Training Epoch 4: 100%|██████████| 39/39 [00:17<00:00,  2.17it/s, loss=0.152] 


Epoch 4 Loss: 5.4745
Epoch 5/5


Training Epoch 5: 100%|██████████| 39/39 [00:18<00:00,  2.13it/s, loss=0.14]  


Epoch 5 Loss: 4.2123
Test Metrics: Precision=0.9304, Recall=0.9304, F1=0.9304

Fine-tuning roberta-large (large) with Train Size 310, Split 5...


Map: 100%|██████████| 310/310 [00:00<00:00, 7518.85 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 7285.45 examples/s]
Map: 100%|██████████| 876/876 [00:00<00:00, 9195.62 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 39/39 [00:19<00:00,  1.99it/s, loss=0.593]


Epoch 1 Loss: 24.3565
Epoch 2/5


Training Epoch 2: 100%|██████████| 39/39 [00:19<00:00,  1.97it/s, loss=0.202]


Epoch 2 Loss: 10.8290
Epoch 3/5


Training Epoch 3: 100%|██████████| 39/39 [00:19<00:00,  2.02it/s, loss=0.0896]


Epoch 3 Loss: 7.2530
Epoch 4/5


Training Epoch 4: 100%|██████████| 39/39 [00:20<00:00,  1.94it/s, loss=0.0926]


Epoch 4 Loss: 5.1947
Epoch 5/5


Training Epoch 5: 100%|██████████| 39/39 [00:20<00:00,  1.94it/s, loss=0.0889]


Epoch 5 Loss: 3.9556
Test Metrics: Precision=0.9327, Recall=0.9327, F1=0.9327

Fine-tuning roberta-large (large) with Train Size 315, Split 1...


Map: 100%|██████████| 315/315 [00:00<00:00, 8608.77 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 7221.08 examples/s]
Map: 100%|██████████| 870/870 [00:00<00:00, 9335.29 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 40/40 [00:17<00:00,  2.27it/s, loss=0.185]


Epoch 1 Loss: 22.6074
Epoch 2/5


Training Epoch 2: 100%|██████████| 40/40 [00:17<00:00,  2.29it/s, loss=0.207]


Epoch 2 Loss: 10.3490
Epoch 3/5


Training Epoch 3: 100%|██████████| 40/40 [00:17<00:00,  2.33it/s, loss=0.0984]


Epoch 3 Loss: 7.1056
Epoch 4/5


Training Epoch 4: 100%|██████████| 40/40 [00:18<00:00,  2.22it/s, loss=0.0566]


Epoch 4 Loss: 4.9621
Epoch 5/5


Training Epoch 5: 100%|██████████| 40/40 [00:17<00:00,  2.28it/s, loss=0.12]  


Epoch 5 Loss: 3.4773
Test Metrics: Precision=0.9330, Recall=0.9330, F1=0.9330

Fine-tuning roberta-large (large) with Train Size 315, Split 2...


Map: 100%|██████████| 315/315 [00:00<00:00, 8868.05 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 6630.23 examples/s]
Map: 100%|██████████| 870/870 [00:00<00:00, 9350.70 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 40/40 [00:17<00:00,  2.25it/s, loss=0.298]


Epoch 1 Loss: 23.4992
Epoch 2/5


Training Epoch 2: 100%|██████████| 40/40 [00:18<00:00,  2.22it/s, loss=0.245]


Epoch 2 Loss: 10.3995
Epoch 3/5


Training Epoch 3: 100%|██████████| 40/40 [00:18<00:00,  2.21it/s, loss=0.107] 


Epoch 3 Loss: 7.3783
Epoch 4/5


Training Epoch 4: 100%|██████████| 40/40 [00:18<00:00,  2.22it/s, loss=0.14]  


Epoch 4 Loss: 4.8737
Epoch 5/5


Training Epoch 5: 100%|██████████| 40/40 [00:17<00:00,  2.25it/s, loss=0.061] 


Epoch 5 Loss: 3.6161
Test Metrics: Precision=0.9345, Recall=0.9345, F1=0.9345

Fine-tuning roberta-large (large) with Train Size 315, Split 3...


Map: 100%|██████████| 315/315 [00:00<00:00, 8720.54 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 5552.92 examples/s]
Map: 100%|██████████| 870/870 [00:00<00:00, 4123.88 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 40/40 [00:18<00:00,  2.21it/s, loss=0.446]


Epoch 1 Loss: 21.8274
Epoch 2/5


Training Epoch 2: 100%|██████████| 40/40 [00:18<00:00,  2.15it/s, loss=0.356] 


Epoch 2 Loss: 10.5422
Epoch 3/5


Training Epoch 3: 100%|██████████| 40/40 [00:18<00:00,  2.16it/s, loss=0.195] 


Epoch 3 Loss: 6.7447
Epoch 4/5


Training Epoch 4: 100%|██████████| 40/40 [00:18<00:00,  2.16it/s, loss=0.121] 


Epoch 4 Loss: 4.7489
Epoch 5/5


Training Epoch 5: 100%|██████████| 40/40 [00:18<00:00,  2.21it/s, loss=0.109] 


Epoch 5 Loss: 3.4062
Test Metrics: Precision=0.9327, Recall=0.9327, F1=0.9327

Fine-tuning roberta-large (large) with Train Size 315, Split 4...


Map: 100%|██████████| 315/315 [00:00<00:00, 8797.25 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 6720.44 examples/s]
Map: 100%|██████████| 870/870 [00:00<00:00, 9195.16 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 40/40 [00:18<00:00,  2.11it/s, loss=0.311]


Epoch 1 Loss: 22.1860
Epoch 2/5


Training Epoch 2: 100%|██████████| 40/40 [00:18<00:00,  2.20it/s, loss=0.362]


Epoch 2 Loss: 10.5332
Epoch 3/5


Training Epoch 3: 100%|██████████| 40/40 [00:19<00:00,  2.10it/s, loss=0.0529]


Epoch 3 Loss: 6.5514
Epoch 4/5


Training Epoch 4: 100%|██████████| 40/40 [00:19<00:00,  2.09it/s, loss=0.039] 


Epoch 4 Loss: 4.6657
Epoch 5/5


Training Epoch 5: 100%|██████████| 40/40 [00:18<00:00,  2.16it/s, loss=0.157] 


Epoch 5 Loss: 4.1972
Test Metrics: Precision=0.9308, Recall=0.9308, F1=0.9308

Fine-tuning roberta-large (large) with Train Size 315, Split 5...


Map: 100%|██████████| 315/315 [00:00<00:00, 8476.11 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 7293.03 examples/s]
Map: 100%|██████████| 870/870 [00:00<00:00, 9275.31 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 40/40 [00:19<00:00,  2.02it/s, loss=0.53] 


Epoch 1 Loss: 33.2035
Epoch 2/5


Training Epoch 2: 100%|██████████| 40/40 [00:19<00:00,  2.05it/s, loss=0.438]


Epoch 2 Loss: 16.0224
Epoch 3/5


Training Epoch 3: 100%|██████████| 40/40 [00:19<00:00,  2.03it/s, loss=0.275]


Epoch 3 Loss: 11.1275
Epoch 4/5


Training Epoch 4: 100%|██████████| 40/40 [00:19<00:00,  2.04it/s, loss=0.0929]


Epoch 4 Loss: 8.8789
Epoch 5/5


Training Epoch 5: 100%|██████████| 40/40 [00:19<00:00,  2.03it/s, loss=0.154] 


Epoch 5 Loss: 6.7591
Test Metrics: Precision=0.9271, Recall=0.9271, F1=0.9271

Fine-tuning roberta-large (large) with Train Size 320, Split 1...


Map: 100%|██████████| 320/320 [00:00<00:00, 8846.53 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 6316.87 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 9526.85 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 40/40 [00:18<00:00,  2.20it/s, loss=0.236]


Epoch 1 Loss: 21.4937
Epoch 2/5


Training Epoch 2: 100%|██████████| 40/40 [00:18<00:00,  2.18it/s, loss=0.189]


Epoch 2 Loss: 9.9512
Epoch 3/5


Training Epoch 3: 100%|██████████| 40/40 [00:18<00:00,  2.20it/s, loss=0.0936]


Epoch 3 Loss: 6.2400
Epoch 4/5


Training Epoch 4: 100%|██████████| 40/40 [00:17<00:00,  2.25it/s, loss=0.0862]


Epoch 4 Loss: 4.0574
Epoch 5/5


Training Epoch 5: 100%|██████████| 40/40 [00:17<00:00,  2.26it/s, loss=0.0544]


Epoch 5 Loss: 2.9349
Test Metrics: Precision=0.9327, Recall=0.9327, F1=0.9327

Fine-tuning roberta-large (large) with Train Size 320, Split 2...


Map: 100%|██████████| 320/320 [00:00<00:00, 8861.01 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 7884.96 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 9174.07 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 40/40 [00:17<00:00,  2.32it/s, loss=0.349]


Epoch 1 Loss: 26.9363
Epoch 2/5


Training Epoch 2: 100%|██████████| 40/40 [00:18<00:00,  2.20it/s, loss=0.267]


Epoch 2 Loss: 12.0238
Epoch 3/5


Training Epoch 3: 100%|██████████| 40/40 [00:17<00:00,  2.27it/s, loss=0.127] 


Epoch 3 Loss: 8.2334
Epoch 4/5


Training Epoch 4: 100%|██████████| 40/40 [00:17<00:00,  2.30it/s, loss=0.122] 


Epoch 4 Loss: 5.7894
Epoch 5/5


Training Epoch 5: 100%|██████████| 40/40 [00:18<00:00,  2.21it/s, loss=0.0938]


Epoch 5 Loss: 4.4680
Test Metrics: Precision=0.9317, Recall=0.9317, F1=0.9317

Fine-tuning roberta-large (large) with Train Size 320, Split 3...


Map: 100%|██████████| 320/320 [00:00<00:00, 8608.83 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 7229.81 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 9266.86 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 40/40 [00:18<00:00,  2.18it/s, loss=0.329]


Epoch 1 Loss: 22.5582
Epoch 2/5


Training Epoch 2: 100%|██████████| 40/40 [00:18<00:00,  2.20it/s, loss=0.387]


Epoch 2 Loss: 18.2223
Epoch 3/5


Training Epoch 3: 100%|██████████| 40/40 [00:18<00:00,  2.17it/s, loss=0.272]


Epoch 3 Loss: 14.6762
Epoch 4/5


Training Epoch 4: 100%|██████████| 40/40 [00:18<00:00,  2.18it/s, loss=0.162]


Epoch 4 Loss: 8.5355
Epoch 5/5


Training Epoch 5: 100%|██████████| 40/40 [00:18<00:00,  2.22it/s, loss=0.116] 


Epoch 5 Loss: 7.2815
Test Metrics: Precision=0.9166, Recall=0.9166, F1=0.9166

Fine-tuning roberta-large (large) with Train Size 320, Split 4...


Map: 100%|██████████| 320/320 [00:00<00:00, 8096.87 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 7218.14 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 9051.46 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 40/40 [00:19<00:00,  2.02it/s, loss=0.275]


Epoch 1 Loss: 21.1909
Epoch 2/5


Training Epoch 2: 100%|██████████| 40/40 [00:19<00:00,  2.06it/s, loss=0.259]


Epoch 2 Loss: 9.3482
Epoch 3/5


Training Epoch 3: 100%|██████████| 40/40 [00:19<00:00,  2.10it/s, loss=0.128] 


Epoch 3 Loss: 6.1155
Epoch 4/5


Training Epoch 4: 100%|██████████| 40/40 [00:19<00:00,  2.09it/s, loss=0.0688]


Epoch 4 Loss: 3.7869
Epoch 5/5


Training Epoch 5: 100%|██████████| 40/40 [00:19<00:00,  2.09it/s, loss=0.0451]


Epoch 5 Loss: 2.7337
Test Metrics: Precision=0.9324, Recall=0.9324, F1=0.9324

Fine-tuning roberta-large (large) with Train Size 320, Split 5...


Map: 100%|██████████| 320/320 [00:00<00:00, 8346.15 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 6980.15 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 9113.95 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 40/40 [00:20<00:00,  2.00it/s, loss=0.469]


Epoch 1 Loss: 24.1292
Epoch 2/5


Training Epoch 2: 100%|██████████| 40/40 [00:20<00:00,  1.94it/s, loss=0.284]


Epoch 2 Loss: 11.9631
Epoch 3/5


Training Epoch 3: 100%|██████████| 40/40 [00:19<00:00,  2.07it/s, loss=0.131] 


Epoch 3 Loss: 7.8006
Epoch 4/5


Training Epoch 4: 100%|██████████| 40/40 [00:20<00:00,  1.97it/s, loss=0.158] 


Epoch 4 Loss: 5.5091
Epoch 5/5


Training Epoch 5: 100%|██████████| 40/40 [00:20<00:00,  1.95it/s, loss=0.104] 


Epoch 5 Loss: 4.1976
Test Metrics: Precision=0.9336, Recall=0.9336, F1=0.9336

Fine-tuning roberta-large (large) with Train Size 325, Split 1...


Map: 100%|██████████| 325/325 [00:00<00:00, 8578.98 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 6734.59 examples/s]
Map: 100%|██████████| 858/858 [00:00<00:00, 9235.97 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 41/41 [00:17<00:00,  2.29it/s, loss=0.226]


Epoch 1 Loss: 20.5367
Epoch 2/5


Training Epoch 2: 100%|██████████| 41/41 [00:18<00:00,  2.24it/s, loss=0.26] 


Epoch 2 Loss: 10.7112
Epoch 3/5


Training Epoch 3: 100%|██████████| 41/41 [00:17<00:00,  2.30it/s, loss=0.125] 


Epoch 3 Loss: 6.9581
Epoch 4/5


Training Epoch 4: 100%|██████████| 41/41 [00:17<00:00,  2.34it/s, loss=0.159] 


Epoch 4 Loss: 4.7861
Epoch 5/5


Training Epoch 5: 100%|██████████| 41/41 [00:18<00:00,  2.21it/s, loss=0.0545]


Epoch 5 Loss: 3.4486
Test Metrics: Precision=0.9340, Recall=0.9340, F1=0.9340

Fine-tuning roberta-large (large) with Train Size 325, Split 2...


Map: 100%|██████████| 325/325 [00:00<00:00, 8713.89 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 7698.14 examples/s]
Map: 100%|██████████| 858/858 [00:00<00:00, 9104.88 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 41/41 [00:18<00:00,  2.20it/s, loss=0.356]


Epoch 1 Loss: 23.9020
Epoch 2/5


Training Epoch 2: 100%|██████████| 41/41 [00:17<00:00,  2.29it/s, loss=0.115]


Epoch 2 Loss: 10.5769
Epoch 3/5


Training Epoch 3: 100%|██████████| 41/41 [00:18<00:00,  2.27it/s, loss=0.289] 


Epoch 3 Loss: 6.8676
Epoch 4/5


Training Epoch 4: 100%|██████████| 41/41 [00:17<00:00,  2.33it/s, loss=0.167] 


Epoch 4 Loss: 4.4992
Epoch 5/5


Training Epoch 5: 100%|██████████| 41/41 [00:18<00:00,  2.20it/s, loss=0.0251]


Epoch 5 Loss: 3.1972
Test Metrics: Precision=0.9341, Recall=0.9341, F1=0.9341

Fine-tuning roberta-large (large) with Train Size 325, Split 3...


Map: 100%|██████████| 325/325 [00:00<00:00, 8362.98 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 6297.32 examples/s]
Map: 100%|██████████| 858/858 [00:00<00:00, 9267.39 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 41/41 [00:18<00:00,  2.24it/s, loss=0.342]


Epoch 1 Loss: 21.1487
Epoch 2/5


Training Epoch 2: 100%|██████████| 41/41 [00:18<00:00,  2.22it/s, loss=0.152]


Epoch 2 Loss: 10.2225
Epoch 3/5


Training Epoch 3: 100%|██████████| 41/41 [00:18<00:00,  2.25it/s, loss=0.218] 


Epoch 3 Loss: 6.9193
Epoch 4/5


Training Epoch 4: 100%|██████████| 41/41 [00:18<00:00,  2.18it/s, loss=0.119] 


Epoch 4 Loss: 4.8560
Epoch 5/5


Training Epoch 5: 100%|██████████| 41/41 [00:18<00:00,  2.16it/s, loss=0.0765]


Epoch 5 Loss: 3.4176
Test Metrics: Precision=0.9346, Recall=0.9346, F1=0.9346

Fine-tuning roberta-large (large) with Train Size 325, Split 4...


Map: 100%|██████████| 325/325 [00:00<00:00, 8492.40 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 8211.99 examples/s]
Map: 100%|██████████| 858/858 [00:00<00:00, 9046.63 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 41/41 [00:18<00:00,  2.18it/s, loss=0.419]


Epoch 1 Loss: 23.3591
Epoch 2/5


Training Epoch 2: 100%|██████████| 41/41 [00:18<00:00,  2.16it/s, loss=0.3]  


Epoch 2 Loss: 10.1406
Epoch 3/5


Training Epoch 3: 100%|██████████| 41/41 [00:19<00:00,  2.14it/s, loss=0.188] 


Epoch 3 Loss: 7.0904
Epoch 4/5


Training Epoch 4: 100%|██████████| 41/41 [00:18<00:00,  2.23it/s, loss=0.0641]


Epoch 4 Loss: 4.8156
Epoch 5/5


Training Epoch 5: 100%|██████████| 41/41 [00:19<00:00,  2.13it/s, loss=0.0904]


Epoch 5 Loss: 3.4790
Test Metrics: Precision=0.9301, Recall=0.9301, F1=0.9301

Fine-tuning roberta-large (large) with Train Size 325, Split 5...


Map: 100%|██████████| 325/325 [00:00<00:00, 8630.09 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 5848.04 examples/s]
Map: 100%|██████████| 858/858 [00:00<00:00, 9474.39 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 41/41 [00:19<00:00,  2.08it/s, loss=0.452]


Epoch 1 Loss: 26.1716
Epoch 2/5


Training Epoch 2: 100%|██████████| 41/41 [00:19<00:00,  2.09it/s, loss=0.318]


Epoch 2 Loss: 11.3393
Epoch 3/5


Training Epoch 3: 100%|██████████| 41/41 [00:19<00:00,  2.05it/s, loss=0.089]


Epoch 3 Loss: 7.9427
Epoch 4/5


Training Epoch 4: 100%|██████████| 41/41 [00:19<00:00,  2.10it/s, loss=0.0282]


Epoch 4 Loss: 5.3530
Epoch 5/5


Training Epoch 5: 100%|██████████| 41/41 [00:20<00:00,  2.02it/s, loss=0.0875]


Epoch 5 Loss: 4.1814
Test Metrics: Precision=0.9320, Recall=0.9320, F1=0.9320

Fine-tuning roberta-large (large) with Train Size 330, Split 1...


Map: 100%|██████████| 330/330 [00:00<00:00, 8517.61 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 7822.10 examples/s]
Map: 100%|██████████| 852/852 [00:00<00:00, 9073.14 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 42/42 [00:18<00:00,  2.27it/s, loss=0.372]


Epoch 1 Loss: 21.7401
Epoch 2/5


Training Epoch 2: 100%|██████████| 42/42 [00:18<00:00,  2.22it/s, loss=0.434]


Epoch 2 Loss: 10.8967
Epoch 3/5


Training Epoch 3: 100%|██████████| 42/42 [00:18<00:00,  2.23it/s, loss=0.119]


Epoch 3 Loss: 7.4502
Epoch 4/5


Training Epoch 4: 100%|██████████| 42/42 [00:19<00:00,  2.19it/s, loss=0.067] 


Epoch 4 Loss: 5.0160
Epoch 5/5


Training Epoch 5: 100%|██████████| 42/42 [00:18<00:00,  2.28it/s, loss=0.0457]


Epoch 5 Loss: 3.5587
Test Metrics: Precision=0.9346, Recall=0.9346, F1=0.9346

Fine-tuning roberta-large (large) with Train Size 330, Split 2...


Map: 100%|██████████| 330/330 [00:00<00:00, 8288.99 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 7200.90 examples/s]
Map: 100%|██████████| 852/852 [00:00<00:00, 8995.17 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 42/42 [00:18<00:00,  2.21it/s, loss=0.435]


Epoch 1 Loss: 22.9491
Epoch 2/5


Training Epoch 2: 100%|██████████| 42/42 [00:18<00:00,  2.23it/s, loss=0.163]


Epoch 2 Loss: 10.9050
Epoch 3/5


Training Epoch 3: 100%|██████████| 42/42 [00:18<00:00,  2.23it/s, loss=0.157]


Epoch 3 Loss: 7.4294
Epoch 4/5


Training Epoch 4: 100%|██████████| 42/42 [00:18<00:00,  2.22it/s, loss=0.107] 


Epoch 4 Loss: 5.0628
Epoch 5/5


Training Epoch 5: 100%|██████████| 42/42 [00:18<00:00,  2.30it/s, loss=0.0479]


Epoch 5 Loss: 3.4874
Test Metrics: Precision=0.9347, Recall=0.9347, F1=0.9347

Fine-tuning roberta-large (large) with Train Size 330, Split 3...


Map: 100%|██████████| 330/330 [00:00<00:00, 8962.54 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 6713.16 examples/s]
Map: 100%|██████████| 852/852 [00:00<00:00, 9135.91 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 42/42 [00:19<00:00,  2.20it/s, loss=0.219]


Epoch 1 Loss: 22.0725
Epoch 2/5


Training Epoch 2: 100%|██████████| 42/42 [00:19<00:00,  2.18it/s, loss=0.241]


Epoch 2 Loss: 10.5262
Epoch 3/5


Training Epoch 3: 100%|██████████| 42/42 [00:18<00:00,  2.25it/s, loss=0.168] 


Epoch 3 Loss: 6.6545
Epoch 4/5


Training Epoch 4: 100%|██████████| 42/42 [00:19<00:00,  2.21it/s, loss=0.0521]


Epoch 4 Loss: 4.9688
Epoch 5/5


Training Epoch 5: 100%|██████████| 42/42 [00:19<00:00,  2.21it/s, loss=0.0502]


Epoch 5 Loss: 3.5285
Test Metrics: Precision=0.9358, Recall=0.9358, F1=0.9358

Fine-tuning roberta-large (large) with Train Size 330, Split 4...


Map: 100%|██████████| 330/330 [00:00<00:00, 8692.69 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 7848.71 examples/s]
Map: 100%|██████████| 852/852 [00:00<00:00, 8835.77 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 42/42 [00:19<00:00,  2.14it/s, loss=0.187]


Epoch 1 Loss: 22.6351
Epoch 2/5


Training Epoch 2: 100%|██████████| 42/42 [00:19<00:00,  2.14it/s, loss=0.154]


Epoch 2 Loss: 10.3853
Epoch 3/5


Training Epoch 3: 100%|██████████| 42/42 [00:18<00:00,  2.29it/s, loss=0.101]


Epoch 3 Loss: 9.1012
Epoch 4/5


Training Epoch 4: 100%|██████████| 42/42 [00:19<00:00,  2.13it/s, loss=0.0391]


Epoch 4 Loss: 6.1777
Epoch 5/5


Training Epoch 5: 100%|██████████| 42/42 [00:19<00:00,  2.16it/s, loss=0.574] 


Epoch 5 Loss: 5.0948
Test Metrics: Precision=0.9298, Recall=0.9298, F1=0.9298

Fine-tuning roberta-large (large) with Train Size 330, Split 5...


Map: 100%|██████████| 330/330 [00:00<00:00, 7970.43 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 6726.05 examples/s]
Map: 100%|██████████| 852/852 [00:00<00:00, 9215.45 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 42/42 [00:21<00:00,  1.99it/s, loss=0.23] 


Epoch 1 Loss: 27.3080
Epoch 2/5


Training Epoch 2: 100%|██████████| 42/42 [00:21<00:00,  1.97it/s, loss=0.317]


Epoch 2 Loss: 12.3615
Epoch 3/5


Training Epoch 3: 100%|██████████| 42/42 [00:20<00:00,  2.02it/s, loss=0.143]


Epoch 3 Loss: 9.0454
Epoch 4/5


Training Epoch 4: 100%|██████████| 42/42 [00:20<00:00,  2.05it/s, loss=0.373] 


Epoch 4 Loss: 7.0127
Epoch 5/5


Training Epoch 5: 100%|██████████| 42/42 [00:21<00:00,  1.96it/s, loss=0.0609]


Epoch 5 Loss: 4.9635
Test Metrics: Precision=0.9307, Recall=0.9307, F1=0.9307

Fine-tuning roberta-large (large) with Train Size 335, Split 1...


Map: 100%|██████████| 335/335 [00:00<00:00, 8581.29 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 7153.14 examples/s]
Map: 100%|██████████| 846/846 [00:00<00:00, 9270.54 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 42/42 [00:18<00:00,  2.22it/s, loss=0.342]


Epoch 1 Loss: 24.1982
Epoch 2/5


Training Epoch 2: 100%|██████████| 42/42 [00:19<00:00,  2.16it/s, loss=0.222]


Epoch 2 Loss: 11.1414
Epoch 3/5


Training Epoch 3: 100%|██████████| 42/42 [00:18<00:00,  2.24it/s, loss=0.124] 


Epoch 3 Loss: 7.3293
Epoch 4/5


Training Epoch 4: 100%|██████████| 42/42 [00:18<00:00,  2.24it/s, loss=0.162] 


Epoch 4 Loss: 5.2628
Epoch 5/5


Training Epoch 5: 100%|██████████| 42/42 [00:18<00:00,  2.24it/s, loss=0.0969]


Epoch 5 Loss: 3.9053
Test Metrics: Precision=0.9335, Recall=0.9335, F1=0.9335

Fine-tuning roberta-large (large) with Train Size 335, Split 2...


Map: 100%|██████████| 335/335 [00:00<00:00, 8460.48 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 6622.17 examples/s]
Map: 100%|██████████| 846/846 [00:00<00:00, 8878.03 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 42/42 [00:18<00:00,  2.22it/s, loss=0.267]


Epoch 1 Loss: 25.6754
Epoch 2/5


Training Epoch 2: 100%|██████████| 42/42 [00:18<00:00,  2.24it/s, loss=0.156]


Epoch 2 Loss: 11.2739
Epoch 3/5


Training Epoch 3: 100%|██████████| 42/42 [00:18<00:00,  2.26it/s, loss=0.0786]


Epoch 3 Loss: 7.7807
Epoch 4/5


Training Epoch 4: 100%|██████████| 42/42 [00:18<00:00,  2.23it/s, loss=0.0829]


Epoch 4 Loss: 5.5062
Epoch 5/5


Training Epoch 5: 100%|██████████| 42/42 [00:18<00:00,  2.28it/s, loss=0.0776]


Epoch 5 Loss: 4.0273
Test Metrics: Precision=0.9335, Recall=0.9335, F1=0.9335

Fine-tuning roberta-large (large) with Train Size 335, Split 3...


Map: 100%|██████████| 335/335 [00:00<00:00, 8822.57 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 7522.91 examples/s]
Map: 100%|██████████| 846/846 [00:00<00:00, 9278.80 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 42/42 [00:19<00:00,  2.16it/s, loss=0.203]


Epoch 1 Loss: 22.0612
Epoch 2/5


Training Epoch 2: 100%|██████████| 42/42 [00:18<00:00,  2.22it/s, loss=0.293] 


Epoch 2 Loss: 10.4014
Epoch 3/5


Training Epoch 3: 100%|██████████| 42/42 [00:19<00:00,  2.21it/s, loss=0.151] 


Epoch 3 Loss: 7.3457
Epoch 4/5


Training Epoch 4: 100%|██████████| 42/42 [00:19<00:00,  2.17it/s, loss=0.069] 


Epoch 4 Loss: 4.7686
Epoch 5/5


Training Epoch 5: 100%|██████████| 42/42 [00:19<00:00,  2.17it/s, loss=0.0476]


Epoch 5 Loss: 3.3811
Test Metrics: Precision=0.9346, Recall=0.9346, F1=0.9346

Fine-tuning roberta-large (large) with Train Size 335, Split 4...


Map: 100%|██████████| 335/335 [00:00<00:00, 8455.24 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 7225.61 examples/s]
Map: 100%|██████████| 846/846 [00:00<00:00, 9169.06 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 42/42 [00:19<00:00,  2.15it/s, loss=0.316]


Epoch 1 Loss: 22.4316
Epoch 2/5


Training Epoch 2: 100%|██████████| 42/42 [00:19<00:00,  2.20it/s, loss=0.148]


Epoch 2 Loss: 10.8059
Epoch 3/5


Training Epoch 3: 100%|██████████| 42/42 [00:19<00:00,  2.16it/s, loss=0.228] 


Epoch 3 Loss: 7.5477
Epoch 4/5


Training Epoch 4: 100%|██████████| 42/42 [00:19<00:00,  2.19it/s, loss=0.137] 


Epoch 4 Loss: 5.1558
Epoch 5/5


Training Epoch 5: 100%|██████████| 42/42 [00:20<00:00,  2.10it/s, loss=0.128] 


Epoch 5 Loss: 3.8578
Test Metrics: Precision=0.9298, Recall=0.9298, F1=0.9298

Fine-tuning roberta-large (large) with Train Size 335, Split 5...


Map: 100%|██████████| 335/335 [00:00<00:00, 8580.98 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 7371.55 examples/s]
Map: 100%|██████████| 846/846 [00:00<00:00, 9245.68 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 42/42 [00:21<00:00,  1.94it/s, loss=0.268]


Epoch 1 Loss: 22.9810
Epoch 2/5


Training Epoch 2: 100%|██████████| 42/42 [00:21<00:00,  1.96it/s, loss=0.197]


Epoch 2 Loss: 11.5533
Epoch 3/5


Training Epoch 3: 100%|██████████| 42/42 [00:21<00:00,  1.94it/s, loss=0.213] 


Epoch 3 Loss: 7.8657
Epoch 4/5


Training Epoch 4: 100%|██████████| 42/42 [00:20<00:00,  2.01it/s, loss=0.177] 


Epoch 4 Loss: 5.5960
Epoch 5/5


Training Epoch 5: 100%|██████████| 42/42 [00:21<00:00,  1.99it/s, loss=0.043] 


Epoch 5 Loss: 4.0565
Test Metrics: Precision=0.9344, Recall=0.9344, F1=0.9344

Fine-tuning roberta-large (large) with Train Size 340, Split 1...


Map: 100%|██████████| 340/340 [00:00<00:00, 8607.02 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 7101.73 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9107.14 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 43/43 [00:19<00:00,  2.25it/s, loss=0.341]


Epoch 1 Loss: 24.1643
Epoch 2/5


Training Epoch 2: 100%|██████████| 43/43 [00:18<00:00,  2.27it/s, loss=0.19] 


Epoch 2 Loss: 11.3622
Epoch 3/5


Training Epoch 3: 100%|██████████| 43/43 [00:19<00:00,  2.25it/s, loss=0.23]  


Epoch 3 Loss: 7.1698
Epoch 4/5


Training Epoch 4: 100%|██████████| 43/43 [00:19<00:00,  2.23it/s, loss=0.0393]


Epoch 4 Loss: 4.6379
Epoch 5/5


Training Epoch 5: 100%|██████████| 43/43 [00:18<00:00,  2.27it/s, loss=0.0901]


Epoch 5 Loss: 3.2902
Test Metrics: Precision=0.9339, Recall=0.9339, F1=0.9339

Fine-tuning roberta-large (large) with Train Size 340, Split 2...


Map: 100%|██████████| 340/340 [00:00<00:00, 8714.27 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 7123.91 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9073.09 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 43/43 [00:19<00:00,  2.24it/s, loss=0.572]


Epoch 1 Loss: 23.6159
Epoch 2/5


Training Epoch 2: 100%|██████████| 43/43 [00:18<00:00,  2.27it/s, loss=0.217]


Epoch 2 Loss: 12.1587
Epoch 3/5


Training Epoch 3: 100%|██████████| 43/43 [00:19<00:00,  2.25it/s, loss=0.171] 


Epoch 3 Loss: 8.3707
Epoch 4/5


Training Epoch 4: 100%|██████████| 43/43 [00:19<00:00,  2.22it/s, loss=0.0846]


Epoch 4 Loss: 5.6850
Epoch 5/5


Training Epoch 5: 100%|██████████| 43/43 [00:19<00:00,  2.21it/s, loss=0.0503]


Epoch 5 Loss: 4.1065
Test Metrics: Precision=0.9332, Recall=0.9332, F1=0.9332

Fine-tuning roberta-large (large) with Train Size 340, Split 3...


Map: 100%|██████████| 340/340 [00:00<00:00, 8759.66 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 7262.86 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9131.83 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 43/43 [00:19<00:00,  2.20it/s, loss=0.295]


Epoch 1 Loss: 22.8980
Epoch 2/5


Training Epoch 2: 100%|██████████| 43/43 [00:19<00:00,  2.22it/s, loss=0.0779]


Epoch 2 Loss: 10.7432
Epoch 3/5


Training Epoch 3: 100%|██████████| 43/43 [00:19<00:00,  2.18it/s, loss=0.33]  


Epoch 3 Loss: 7.3907
Epoch 4/5


Training Epoch 4: 100%|██████████| 43/43 [00:19<00:00,  2.20it/s, loss=0.203] 


Epoch 4 Loss: 5.3659
Epoch 5/5


Training Epoch 5: 100%|██████████| 43/43 [00:18<00:00,  2.27it/s, loss=0.092] 


Epoch 5 Loss: 4.0793
Test Metrics: Precision=0.9349, Recall=0.9349, F1=0.9349

Fine-tuning roberta-large (large) with Train Size 340, Split 4...


Map: 100%|██████████| 340/340 [00:00<00:00, 8896.88 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 6585.53 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9134.70 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 43/43 [00:19<00:00,  2.17it/s, loss=0.211]


Epoch 1 Loss: 21.3830
Epoch 2/5


Training Epoch 2: 100%|██████████| 43/43 [00:20<00:00,  2.12it/s, loss=0.453]


Epoch 2 Loss: 10.0802
Epoch 3/5


Training Epoch 3: 100%|██████████| 43/43 [00:20<00:00,  2.12it/s, loss=0.2]   


Epoch 3 Loss: 6.5946
Epoch 4/5


Training Epoch 4: 100%|██████████| 43/43 [00:20<00:00,  2.12it/s, loss=0.163] 


Epoch 4 Loss: 4.5911
Epoch 5/5


Training Epoch 5: 100%|██████████| 43/43 [00:19<00:00,  2.18it/s, loss=0.0446]


Epoch 5 Loss: 3.2861
Test Metrics: Precision=0.9336, Recall=0.9336, F1=0.9336

Fine-tuning roberta-large (large) with Train Size 340, Split 5...


Map: 100%|██████████| 340/340 [00:00<00:00, 7393.45 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 8101.48 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9136.92 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 43/43 [00:21<00:00,  2.00it/s, loss=0.308]


Epoch 1 Loss: 23.6443
Epoch 2/5


Training Epoch 2: 100%|██████████| 43/43 [00:21<00:00,  2.00it/s, loss=0.15] 


Epoch 2 Loss: 11.3224
Epoch 3/5


Training Epoch 3: 100%|██████████| 43/43 [00:20<00:00,  2.06it/s, loss=0.268] 


Epoch 3 Loss: 7.5186
Epoch 4/5


Training Epoch 4: 100%|██████████| 43/43 [00:20<00:00,  2.09it/s, loss=0.0699]


Epoch 4 Loss: 5.3241
Epoch 5/5


Training Epoch 5: 100%|██████████| 43/43 [00:21<00:00,  2.00it/s, loss=0.0747]


Epoch 5 Loss: 3.6828
Test Metrics: Precision=0.9336, Recall=0.9336, F1=0.9336

Fine-tuning roberta-large (large) with Train Size 345, Split 1...


Map: 100%|██████████| 345/345 [00:00<00:00, 8917.95 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 6637.93 examples/s]
Map: 100%|██████████| 834/834 [00:00<00:00, 9142.69 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 44/44 [00:19<00:00,  2.30it/s, loss=0.528]


Epoch 1 Loss: 22.7318
Epoch 2/5


Training Epoch 2: 100%|██████████| 44/44 [00:19<00:00,  2.26it/s, loss=0.148]


Epoch 2 Loss: 11.0936
Epoch 3/5


Training Epoch 3: 100%|██████████| 44/44 [00:18<00:00,  2.32it/s, loss=0.0296]


Epoch 3 Loss: 7.6433
Epoch 4/5


Training Epoch 4: 100%|██████████| 44/44 [00:19<00:00,  2.30it/s, loss=0.0384]


Epoch 4 Loss: 5.3252
Epoch 5/5


Training Epoch 5: 100%|██████████| 44/44 [00:18<00:00,  2.34it/s, loss=0.0138]


Epoch 5 Loss: 4.0912
Test Metrics: Precision=0.9321, Recall=0.9321, F1=0.9321

Fine-tuning roberta-large (large) with Train Size 345, Split 2...


Map: 100%|██████████| 345/345 [00:00<00:00, 9059.82 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 5868.06 examples/s]
Map: 100%|██████████| 834/834 [00:00<00:00, 9381.22 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 44/44 [00:19<00:00,  2.29it/s, loss=0.369]


Epoch 1 Loss: 23.8422
Epoch 2/5


Training Epoch 2: 100%|██████████| 44/44 [00:19<00:00,  2.25it/s, loss=0.482]


Epoch 2 Loss: 15.0517
Epoch 3/5


Training Epoch 3: 100%|██████████| 44/44 [00:19<00:00,  2.26it/s, loss=0.188]


Epoch 3 Loss: 10.5299
Epoch 4/5


Training Epoch 4: 100%|██████████| 44/44 [00:19<00:00,  2.23it/s, loss=0.0322]


Epoch 4 Loss: 7.5097
Epoch 5/5


Training Epoch 5: 100%|██████████| 44/44 [00:20<00:00,  2.19it/s, loss=0.198] 


Epoch 5 Loss: 5.9821
Test Metrics: Precision=0.9311, Recall=0.9311, F1=0.9311

Fine-tuning roberta-large (large) with Train Size 345, Split 3...


Map: 100%|██████████| 345/345 [00:00<00:00, 8707.79 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 6836.12 examples/s]
Map: 100%|██████████| 834/834 [00:00<00:00, 3981.55 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 44/44 [00:19<00:00,  2.24it/s, loss=0.207]


Epoch 1 Loss: 23.0218
Epoch 2/5


Training Epoch 2: 100%|██████████| 44/44 [00:19<00:00,  2.24it/s, loss=0.0702]


Epoch 2 Loss: 11.2147
Epoch 3/5


Training Epoch 3: 100%|██████████| 44/44 [00:19<00:00,  2.24it/s, loss=0.375]


Epoch 3 Loss: 7.4532
Epoch 4/5


Training Epoch 4: 100%|██████████| 44/44 [00:19<00:00,  2.24it/s, loss=0.0358]


Epoch 4 Loss: 5.2606
Epoch 5/5


Training Epoch 5: 100%|██████████| 44/44 [00:19<00:00,  2.22it/s, loss=0.0278]


Epoch 5 Loss: 3.5045
Test Metrics: Precision=0.9338, Recall=0.9338, F1=0.9338

Fine-tuning roberta-large (large) with Train Size 345, Split 4...


Map: 100%|██████████| 345/345 [00:00<00:00, 9003.84 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 7530.36 examples/s]
Map: 100%|██████████| 834/834 [00:00<00:00, 8842.30 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 44/44 [00:19<00:00,  2.23it/s, loss=0.241]


Epoch 1 Loss: 23.9965
Epoch 2/5


Training Epoch 2: 100%|██████████| 44/44 [00:19<00:00,  2.28it/s, loss=0.154]


Epoch 2 Loss: 10.8698
Epoch 3/5


Training Epoch 3: 100%|██████████| 44/44 [00:20<00:00,  2.20it/s, loss=0.0941]


Epoch 3 Loss: 7.3621
Epoch 4/5


Training Epoch 4: 100%|██████████| 44/44 [00:20<00:00,  2.16it/s, loss=0.163] 


Epoch 4 Loss: 5.3355
Epoch 5/5


Training Epoch 5: 100%|██████████| 44/44 [00:21<00:00,  2.09it/s, loss=0.352] 


Epoch 5 Loss: 4.2485
Test Metrics: Precision=0.9338, Recall=0.9338, F1=0.9338

Fine-tuning roberta-large (large) with Train Size 345, Split 5...


Map: 100%|██████████| 345/345 [00:00<00:00, 8231.05 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 7557.70 examples/s]
Map: 100%|██████████| 834/834 [00:00<00:00, 9147.81 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 44/44 [00:21<00:00,  2.01it/s, loss=0.369]


Epoch 1 Loss: 23.6277
Epoch 2/5


Training Epoch 2: 100%|██████████| 44/44 [00:20<00:00,  2.12it/s, loss=0.496]


Epoch 2 Loss: 13.4721
Epoch 3/5


Training Epoch 3: 100%|██████████| 44/44 [00:21<00:00,  2.01it/s, loss=0.167] 


Epoch 3 Loss: 9.2691
Epoch 4/5


Training Epoch 4: 100%|██████████| 44/44 [00:21<00:00,  2.09it/s, loss=0.397] 


Epoch 4 Loss: 7.4575
Epoch 5/5


Training Epoch 5: 100%|██████████| 44/44 [00:21<00:00,  2.02it/s, loss=0.094] 


Epoch 5 Loss: 5.4891
Test Metrics: Precision=0.9300, Recall=0.9300, F1=0.9300

Fine-tuning roberta-large (large) with Train Size 350, Split 1...


Map: 100%|██████████| 350/350 [00:00<00:00, 8829.26 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 6997.34 examples/s]
Map: 100%|██████████| 828/828 [00:00<00:00, 9214.24 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 44/44 [00:19<00:00,  2.28it/s, loss=0.479]


Epoch 1 Loss: 25.3434
Epoch 2/5


Training Epoch 2: 100%|██████████| 44/44 [00:19<00:00,  2.22it/s, loss=0.247]


Epoch 2 Loss: 11.4536
Epoch 3/5


Training Epoch 3: 100%|██████████| 44/44 [00:19<00:00,  2.27it/s, loss=0.17]  


Epoch 3 Loss: 7.5330
Epoch 4/5


Training Epoch 4: 100%|██████████| 44/44 [00:20<00:00,  2.19it/s, loss=0.092] 


Epoch 4 Loss: 5.0251
Epoch 5/5


Training Epoch 5: 100%|██████████| 44/44 [00:19<00:00,  2.26it/s, loss=0.0924]


Epoch 5 Loss: 3.5199
Test Metrics: Precision=0.9328, Recall=0.9328, F1=0.9328

Fine-tuning roberta-large (large) with Train Size 350, Split 2...


Map: 100%|██████████| 350/350 [00:00<00:00, 8707.71 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 7315.71 examples/s]
Map: 100%|██████████| 828/828 [00:00<00:00, 8985.91 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 44/44 [00:19<00:00,  2.28it/s, loss=0.405]


Epoch 1 Loss: 22.8986
Epoch 2/5


Training Epoch 2: 100%|██████████| 44/44 [00:20<00:00,  2.15it/s, loss=0.267] 


Epoch 2 Loss: 10.7531
Epoch 3/5


Training Epoch 3: 100%|██████████| 44/44 [00:20<00:00,  2.16it/s, loss=0.0566]


Epoch 3 Loss: 7.9095
Epoch 4/5


Training Epoch 4: 100%|██████████| 44/44 [00:19<00:00,  2.25it/s, loss=0.0321]


Epoch 4 Loss: 5.2268
Epoch 5/5


Training Epoch 5: 100%|██████████| 44/44 [00:19<00:00,  2.23it/s, loss=0.118] 


Epoch 5 Loss: 4.0694
Test Metrics: Precision=0.9356, Recall=0.9356, F1=0.9356

Fine-tuning roberta-large (large) with Train Size 350, Split 3...


Map: 100%|██████████| 350/350 [00:00<00:00, 8787.09 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 7507.06 examples/s]
Map: 100%|██████████| 828/828 [00:00<00:00, 8952.18 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 44/44 [00:20<00:00,  2.17it/s, loss=0.396]


Epoch 1 Loss: 23.2045
Epoch 2/5


Training Epoch 2: 100%|██████████| 44/44 [00:19<00:00,  2.23it/s, loss=0.154]


Epoch 2 Loss: 10.6428
Epoch 3/5


Training Epoch 3: 100%|██████████| 44/44 [00:19<00:00,  2.21it/s, loss=0.208] 


Epoch 3 Loss: 6.9322
Epoch 4/5


Training Epoch 4: 100%|██████████| 44/44 [00:19<00:00,  2.24it/s, loss=0.161] 


Epoch 4 Loss: 4.6515
Epoch 5/5


Training Epoch 5: 100%|██████████| 44/44 [00:19<00:00,  2.24it/s, loss=0.0542]


Epoch 5 Loss: 3.2978
Test Metrics: Precision=0.9369, Recall=0.9369, F1=0.9369

Fine-tuning roberta-large (large) with Train Size 350, Split 4...


Map: 100%|██████████| 350/350 [00:00<00:00, 8728.94 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 7293.35 examples/s]
Map: 100%|██████████| 828/828 [00:00<00:00, 9101.13 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 44/44 [00:20<00:00,  2.12it/s, loss=0.199]


Epoch 1 Loss: 22.2594
Epoch 2/5


Training Epoch 2: 100%|██████████| 44/44 [00:20<00:00,  2.11it/s, loss=0.139]


Epoch 2 Loss: 10.1738
Epoch 3/5


Training Epoch 3: 100%|██████████| 44/44 [00:20<00:00,  2.15it/s, loss=0.153] 


Epoch 3 Loss: 7.3519
Epoch 4/5


Training Epoch 4: 100%|██████████| 44/44 [00:20<00:00,  2.14it/s, loss=0.0873]


Epoch 4 Loss: 4.8558
Epoch 5/5


Training Epoch 5: 100%|██████████| 44/44 [00:19<00:00,  2.21it/s, loss=0.0585]


Epoch 5 Loss: 3.6550
Test Metrics: Precision=0.9323, Recall=0.9323, F1=0.9323

Fine-tuning roberta-large (large) with Train Size 350, Split 5...


Map: 100%|██████████| 350/350 [00:00<00:00, 8763.85 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 7306.42 examples/s]
Map: 100%|██████████| 828/828 [00:00<00:00, 9126.58 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 44/44 [00:21<00:00,  2.05it/s, loss=0.458]


Epoch 1 Loss: 23.6615
Epoch 2/5


Training Epoch 2: 100%|██████████| 44/44 [00:22<00:00,  1.97it/s, loss=0.216]


Epoch 2 Loss: 11.9395
Epoch 3/5


Training Epoch 3: 100%|██████████| 44/44 [00:21<00:00,  2.05it/s, loss=0.223] 


Epoch 3 Loss: 7.8904
Epoch 4/5


Training Epoch 4: 100%|██████████| 44/44 [00:22<00:00,  1.96it/s, loss=0.0548]


Epoch 4 Loss: 5.2493
Epoch 5/5


Training Epoch 5: 100%|██████████| 44/44 [00:20<00:00,  2.11it/s, loss=0.0264]


Epoch 5 Loss: 3.6713
Test Metrics: Precision=0.9369, Recall=0.9369, F1=0.9369

Fine-tuning roberta-large (large) with Train Size 355, Split 1...


Map: 100%|██████████| 355/355 [00:00<00:00, 8627.75 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 6291.77 examples/s]
Map: 100%|██████████| 822/822 [00:00<00:00, 9330.84 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 45/45 [00:19<00:00,  2.27it/s, loss=0.763]


Epoch 1 Loss: 28.0448
Epoch 2/5


Training Epoch 2: 100%|██████████| 45/45 [00:19<00:00,  2.27it/s, loss=0.393]


Epoch 2 Loss: 16.3846
Epoch 3/5


Training Epoch 3: 100%|██████████| 45/45 [00:19<00:00,  2.31it/s, loss=0.168]


Epoch 3 Loss: 11.2160
Epoch 4/5


Training Epoch 4: 100%|██████████| 45/45 [00:20<00:00,  2.22it/s, loss=0.265] 


Epoch 4 Loss: 7.4462
Epoch 5/5


Training Epoch 5: 100%|██████████| 45/45 [00:20<00:00,  2.22it/s, loss=0.0921]


Epoch 5 Loss: 6.1088
Test Metrics: Precision=0.9298, Recall=0.9298, F1=0.9298

Fine-tuning roberta-large (large) with Train Size 355, Split 2...


Map: 100%|██████████| 355/355 [00:00<00:00, 9079.63 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 7419.29 examples/s]
Map: 100%|██████████| 822/822 [00:00<00:00, 9134.53 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 45/45 [00:20<00:00,  2.14it/s, loss=0.282]


Epoch 1 Loss: 23.7504
Epoch 2/5


Training Epoch 2: 100%|██████████| 45/45 [00:20<00:00,  2.17it/s, loss=0.236]


Epoch 2 Loss: 11.6636
Epoch 3/5


Training Epoch 3: 100%|██████████| 45/45 [00:20<00:00,  2.14it/s, loss=0.242] 


Epoch 3 Loss: 7.8297
Epoch 4/5


Training Epoch 4: 100%|██████████| 45/45 [00:21<00:00,  2.12it/s, loss=0.0967]


Epoch 4 Loss: 5.6452
Epoch 5/5


Training Epoch 5: 100%|██████████| 45/45 [00:20<00:00,  2.18it/s, loss=0.112] 


Epoch 5 Loss: 4.0467
Test Metrics: Precision=0.9333, Recall=0.9333, F1=0.9333

Fine-tuning roberta-large (large) with Train Size 355, Split 3...


Map: 100%|██████████| 355/355 [00:00<00:00, 8340.49 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 8167.06 examples/s]
Map: 100%|██████████| 822/822 [00:00<00:00, 9054.71 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 45/45 [00:20<00:00,  2.21it/s, loss=0.298]


Epoch 1 Loss: 24.1796
Epoch 2/5


Training Epoch 2: 100%|██████████| 45/45 [00:20<00:00,  2.18it/s, loss=0.201]


Epoch 2 Loss: 11.0466
Epoch 3/5


Training Epoch 3: 100%|██████████| 45/45 [00:20<00:00,  2.19it/s, loss=0.11]  


Epoch 3 Loss: 7.6619
Epoch 4/5


Training Epoch 4: 100%|██████████| 45/45 [00:20<00:00,  2.18it/s, loss=0.0557]


Epoch 4 Loss: 5.1632
Epoch 5/5


Training Epoch 5: 100%|██████████| 45/45 [00:20<00:00,  2.16it/s, loss=0.0499]


Epoch 5 Loss: 3.6252
Test Metrics: Precision=0.9345, Recall=0.9345, F1=0.9345

Fine-tuning roberta-large (large) with Train Size 355, Split 4...


Map: 100%|██████████| 355/355 [00:00<00:00, 8891.07 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 7809.39 examples/s]
Map: 100%|██████████| 822/822 [00:00<00:00, 8986.41 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 45/45 [00:20<00:00,  2.25it/s, loss=0.273]


Epoch 1 Loss: 25.2704
Epoch 2/5


Training Epoch 2: 100%|██████████| 45/45 [00:20<00:00,  2.19it/s, loss=0.232]


Epoch 2 Loss: 11.9965
Epoch 3/5


Training Epoch 3: 100%|██████████| 45/45 [00:20<00:00,  2.18it/s, loss=0.133]


Epoch 3 Loss: 8.2448
Epoch 4/5


Training Epoch 4: 100%|██████████| 45/45 [00:20<00:00,  2.17it/s, loss=0.0989]


Epoch 4 Loss: 5.9340
Epoch 5/5


Training Epoch 5: 100%|██████████| 45/45 [00:20<00:00,  2.25it/s, loss=0.117] 


Epoch 5 Loss: 5.2282
Test Metrics: Precision=0.9269, Recall=0.9269, F1=0.9269

Fine-tuning roberta-large (large) with Train Size 355, Split 5...


Map: 100%|██████████| 355/355 [00:00<00:00, 8466.93 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 6714.06 examples/s]
Map: 100%|██████████| 822/822 [00:00<00:00, 9315.89 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 45/45 [00:22<00:00,  2.01it/s, loss=0.258]


Epoch 1 Loss: 25.2427
Epoch 2/5


Training Epoch 2: 100%|██████████| 45/45 [00:21<00:00,  2.08it/s, loss=0.129]


Epoch 2 Loss: 11.0232
Epoch 3/5


Training Epoch 3: 100%|██████████| 45/45 [00:22<00:00,  1.98it/s, loss=0.031] 


Epoch 3 Loss: 7.4800
Epoch 4/5


Training Epoch 4: 100%|██████████| 45/45 [00:22<00:00,  2.03it/s, loss=0.206] 


Epoch 4 Loss: 5.3774
Epoch 5/5


Training Epoch 5: 100%|██████████| 45/45 [00:21<00:00,  2.07it/s, loss=0.216] 


Epoch 5 Loss: 3.6176
Test Metrics: Precision=0.9370, Recall=0.9370, F1=0.9370

Fine-tuning roberta-large (large) with Train Size 360, Split 1...


Map: 100%|██████████| 360/360 [00:00<00:00, 8902.85 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 6952.85 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9261.96 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 45/45 [00:20<00:00,  2.20it/s, loss=0.219]


Epoch 1 Loss: 22.9110
Epoch 2/5


Training Epoch 2: 100%|██████████| 45/45 [00:20<00:00,  2.16it/s, loss=0.168]


Epoch 2 Loss: 10.6932
Epoch 3/5


Training Epoch 3: 100%|██████████| 45/45 [00:19<00:00,  2.29it/s, loss=0.0925]


Epoch 3 Loss: 7.5824
Epoch 4/5


Training Epoch 4: 100%|██████████| 45/45 [00:20<00:00,  2.20it/s, loss=0.101] 


Epoch 4 Loss: 4.9776
Epoch 5/5


Training Epoch 5: 100%|██████████| 45/45 [00:20<00:00,  2.20it/s, loss=0.0743]


Epoch 5 Loss: 3.4608
Test Metrics: Precision=0.9349, Recall=0.9349, F1=0.9349

Fine-tuning roberta-large (large) with Train Size 360, Split 2...


Map: 100%|██████████| 360/360 [00:00<00:00, 9123.67 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 6952.69 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9260.53 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 45/45 [00:22<00:00,  2.03it/s, loss=0.322]


Epoch 1 Loss: 24.6496
Epoch 2/5


Training Epoch 2: 100%|██████████| 45/45 [00:21<00:00,  2.12it/s, loss=0.181]


Epoch 2 Loss: 11.0465
Epoch 3/5


Training Epoch 3: 100%|██████████| 45/45 [00:20<00:00,  2.18it/s, loss=0.157] 


Epoch 3 Loss: 7.1560
Epoch 4/5


Training Epoch 4: 100%|██████████| 45/45 [00:21<00:00,  2.11it/s, loss=0.0472]


Epoch 4 Loss: 5.0227
Epoch 5/5


Training Epoch 5: 100%|██████████| 45/45 [00:21<00:00,  2.11it/s, loss=0.0542]


Epoch 5 Loss: 3.5139
Test Metrics: Precision=0.9344, Recall=0.9344, F1=0.9344

Fine-tuning roberta-large (large) with Train Size 360, Split 3...


Map: 100%|██████████| 360/360 [00:00<00:00, 8999.95 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7626.39 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9208.28 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 45/45 [00:20<00:00,  2.16it/s, loss=0.326]


Epoch 1 Loss: 24.8143
Epoch 2/5


Training Epoch 2: 100%|██████████| 45/45 [00:21<00:00,  2.12it/s, loss=0.256]


Epoch 2 Loss: 11.9519
Epoch 3/5


Training Epoch 3: 100%|██████████| 45/45 [00:21<00:00,  2.14it/s, loss=0.118]


Epoch 3 Loss: 7.5372
Epoch 4/5


Training Epoch 4: 100%|██████████| 45/45 [00:21<00:00,  2.14it/s, loss=0.0848]


Epoch 4 Loss: 5.5222
Epoch 5/5


Training Epoch 5: 100%|██████████| 45/45 [00:20<00:00,  2.16it/s, loss=0.0829]


Epoch 5 Loss: 4.0624
Test Metrics: Precision=0.9338, Recall=0.9338, F1=0.9338

Fine-tuning roberta-large (large) with Train Size 360, Split 4...


Map: 100%|██████████| 360/360 [00:00<00:00, 8094.07 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7120.39 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9064.66 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 45/45 [00:21<00:00,  2.14it/s, loss=0.513]


Epoch 1 Loss: 26.0913
Epoch 2/5


Training Epoch 2: 100%|██████████| 45/45 [00:20<00:00,  2.17it/s, loss=0.294]


Epoch 2 Loss: 11.1657
Epoch 3/5


Training Epoch 3: 100%|██████████| 45/45 [00:20<00:00,  2.17it/s, loss=0.143] 


Epoch 3 Loss: 7.7580
Epoch 4/5


Training Epoch 4: 100%|██████████| 45/45 [00:21<00:00,  2.09it/s, loss=0.0781]


Epoch 4 Loss: 5.5648
Epoch 5/5


Training Epoch 5: 100%|██████████| 45/45 [00:21<00:00,  2.08it/s, loss=0.0396]


Epoch 5 Loss: 4.1677
Test Metrics: Precision=0.9327, Recall=0.9327, F1=0.9327

Fine-tuning roberta-large (large) with Train Size 360, Split 5...


Map: 100%|██████████| 360/360 [00:00<00:00, 8594.30 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7024.82 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9201.20 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 45/45 [00:22<00:00,  2.02it/s, loss=0.214]


Epoch 1 Loss: 28.5551
Epoch 2/5


Training Epoch 2: 100%|██████████| 45/45 [00:23<00:00,  1.95it/s, loss=0.339]


Epoch 2 Loss: 11.6181
Epoch 3/5


Training Epoch 3: 100%|██████████| 45/45 [00:22<00:00,  1.97it/s, loss=0.079] 


Epoch 3 Loss: 7.4607
Epoch 4/5


Training Epoch 4: 100%|██████████| 45/45 [00:22<00:00,  2.00it/s, loss=0.0961]


Epoch 4 Loss: 5.2105
Epoch 5/5


Training Epoch 5: 100%|██████████| 45/45 [00:21<00:00,  2.10it/s, loss=0.0668]


Epoch 5 Loss: 4.2268
Test Metrics: Precision=0.9344, Recall=0.9344, F1=0.9344

Fine-tuning roberta-large (large) with Train Size 365, Split 1...


Map: 100%|██████████| 365/365 [00:00<00:00, 8842.81 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 6967.28 examples/s]
Map: 100%|██████████| 810/810 [00:00<00:00, 9364.22 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 46/46 [00:20<00:00,  2.29it/s, loss=0.444]


Epoch 1 Loss: 24.3021
Epoch 2/5


Training Epoch 2: 100%|██████████| 46/46 [00:19<00:00,  2.30it/s, loss=0.212]


Epoch 2 Loss: 11.4347
Epoch 3/5


Training Epoch 3: 100%|██████████| 46/46 [00:19<00:00,  2.33it/s, loss=0.262] 


Epoch 3 Loss: 7.3697
Epoch 4/5


Training Epoch 4: 100%|██████████| 46/46 [00:20<00:00,  2.26it/s, loss=0.1]   


Epoch 4 Loss: 5.6520
Epoch 5/5


Training Epoch 5: 100%|██████████| 46/46 [00:20<00:00,  2.25it/s, loss=0.0669]


Epoch 5 Loss: 4.1334
Test Metrics: Precision=0.9333, Recall=0.9333, F1=0.9333

Fine-tuning roberta-large (large) with Train Size 365, Split 2...


Map: 100%|██████████| 365/365 [00:00<00:00, 9112.25 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 6121.48 examples/s]
Map: 100%|██████████| 810/810 [00:00<00:00, 8694.79 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 46/46 [00:21<00:00,  2.16it/s, loss=0.257]


Epoch 1 Loss: 24.7482
Epoch 2/5


Training Epoch 2: 100%|██████████| 46/46 [00:21<00:00,  2.16it/s, loss=0.176]


Epoch 2 Loss: 11.8908
Epoch 3/5


Training Epoch 3: 100%|██████████| 46/46 [00:21<00:00,  2.16it/s, loss=0.187] 


Epoch 3 Loss: 7.8133
Epoch 4/5


Training Epoch 4: 100%|██████████| 46/46 [00:21<00:00,  2.11it/s, loss=0.163] 


Epoch 4 Loss: 5.4660
Epoch 5/5


Training Epoch 5: 100%|██████████| 46/46 [00:21<00:00,  2.14it/s, loss=0.0588]


Epoch 5 Loss: 3.7301
Test Metrics: Precision=0.9344, Recall=0.9344, F1=0.9344

Fine-tuning roberta-large (large) with Train Size 365, Split 3...


Map: 100%|██████████| 365/365 [00:00<00:00, 8587.32 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 7588.21 examples/s]
Map: 100%|██████████| 810/810 [00:00<00:00, 9504.72 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 46/46 [00:21<00:00,  2.13it/s, loss=0.195]


Epoch 1 Loss: 23.7396
Epoch 2/5


Training Epoch 2: 100%|██████████| 46/46 [00:21<00:00,  2.14it/s, loss=0.212]


Epoch 2 Loss: 11.4262
Epoch 3/5


Training Epoch 3: 100%|██████████| 46/46 [00:21<00:00,  2.17it/s, loss=0.135] 


Epoch 3 Loss: 7.3166
Epoch 4/5


Training Epoch 4: 100%|██████████| 46/46 [00:21<00:00,  2.16it/s, loss=0.0621]


Epoch 4 Loss: 4.9677
Epoch 5/5


Training Epoch 5: 100%|██████████| 46/46 [00:21<00:00,  2.14it/s, loss=0.0317]


Epoch 5 Loss: 3.5279
Test Metrics: Precision=0.9347, Recall=0.9347, F1=0.9347

Fine-tuning roberta-large (large) with Train Size 365, Split 4...


Map: 100%|██████████| 365/365 [00:00<00:00, 8826.24 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 6503.49 examples/s]
Map: 100%|██████████| 810/810 [00:00<00:00, 9107.15 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 46/46 [00:20<00:00,  2.28it/s, loss=0.281]


Epoch 1 Loss: 21.4918
Epoch 2/5


Training Epoch 2: 100%|██████████| 46/46 [00:21<00:00,  2.13it/s, loss=0.267]


Epoch 2 Loss: 10.2400
Epoch 3/5


Training Epoch 3: 100%|██████████| 46/46 [00:20<00:00,  2.20it/s, loss=0.0905]


Epoch 3 Loss: 6.5470
Epoch 4/5


Training Epoch 4: 100%|██████████| 46/46 [00:21<00:00,  2.14it/s, loss=0.15]  


Epoch 4 Loss: 4.5859
Epoch 5/5


Training Epoch 5: 100%|██████████| 46/46 [00:21<00:00,  2.15it/s, loss=0.0805]


Epoch 5 Loss: 3.3011
Test Metrics: Precision=0.9343, Recall=0.9343, F1=0.9343

Fine-tuning roberta-large (large) with Train Size 365, Split 5...


Map: 100%|██████████| 365/365 [00:00<00:00, 8753.12 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 6846.85 examples/s]
Map: 100%|██████████| 810/810 [00:00<00:00, 9038.53 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 46/46 [00:22<00:00,  2.03it/s, loss=0.405]


Epoch 1 Loss: 24.0059
Epoch 2/5


Training Epoch 2: 100%|██████████| 46/46 [00:22<00:00,  2.01it/s, loss=0.215]


Epoch 2 Loss: 11.9806
Epoch 3/5


Training Epoch 3: 100%|██████████| 46/46 [00:23<00:00,  2.00it/s, loss=0.209] 


Epoch 3 Loss: 7.5364
Epoch 4/5


Training Epoch 4: 100%|██████████| 46/46 [00:21<00:00,  2.10it/s, loss=0.141] 


Epoch 4 Loss: 5.2626
Epoch 5/5


Training Epoch 5: 100%|██████████| 46/46 [00:22<00:00,  2.06it/s, loss=0.158] 


Epoch 5 Loss: 3.5633
Test Metrics: Precision=0.9348, Recall=0.9348, F1=0.9348

Fine-tuning roberta-large (large) with Train Size 370, Split 1...


Map: 100%|██████████| 370/370 [00:00<00:00, 8638.42 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 6501.30 examples/s]
Map: 100%|██████████| 804/804 [00:00<00:00, 8997.46 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 47/47 [00:19<00:00,  2.38it/s, loss=0.241]


Epoch 1 Loss: 26.2242
Epoch 2/5


Training Epoch 2: 100%|██████████| 47/47 [00:20<00:00,  2.25it/s, loss=0.223]


Epoch 2 Loss: 12.4858
Epoch 3/5


Training Epoch 3: 100%|██████████| 47/47 [00:20<00:00,  2.27it/s, loss=0.179] 


Epoch 3 Loss: 8.0622
Epoch 4/5


Training Epoch 4: 100%|██████████| 47/47 [00:20<00:00,  2.24it/s, loss=0.0795]


Epoch 4 Loss: 5.5407
Epoch 5/5


Training Epoch 5: 100%|██████████| 47/47 [00:20<00:00,  2.26it/s, loss=0.0148]


Epoch 5 Loss: 4.2227
Test Metrics: Precision=0.9353, Recall=0.9353, F1=0.9353

Fine-tuning roberta-large (large) with Train Size 370, Split 2...


Map: 100%|██████████| 370/370 [00:00<00:00, 8985.64 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 6941.26 examples/s]
Map: 100%|██████████| 804/804 [00:00<00:00, 9141.41 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 47/47 [00:21<00:00,  2.14it/s, loss=0.258]


Epoch 1 Loss: 24.7031
Epoch 2/5


Training Epoch 2: 100%|██████████| 47/47 [00:21<00:00,  2.16it/s, loss=0.137]


Epoch 2 Loss: 10.8046
Epoch 3/5


Training Epoch 3: 100%|██████████| 47/47 [00:21<00:00,  2.15it/s, loss=0.197] 


Epoch 3 Loss: 7.2729
Epoch 4/5


Training Epoch 4: 100%|██████████| 47/47 [00:21<00:00,  2.15it/s, loss=0.147] 


Epoch 4 Loss: 5.2862
Epoch 5/5


Training Epoch 5: 100%|██████████| 47/47 [00:21<00:00,  2.14it/s, loss=0.056] 


Epoch 5 Loss: 3.7172
Test Metrics: Precision=0.9337, Recall=0.9337, F1=0.9337

Fine-tuning roberta-large (large) with Train Size 370, Split 3...


Map: 100%|██████████| 370/370 [00:00<00:00, 8564.01 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 8217.59 examples/s]
Map: 100%|██████████| 804/804 [00:00<00:00, 9002.60 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 47/47 [00:21<00:00,  2.15it/s, loss=0.626]


Epoch 1 Loss: 24.9041
Epoch 2/5


Training Epoch 2: 100%|██████████| 47/47 [00:21<00:00,  2.14it/s, loss=0.103]


Epoch 2 Loss: 11.1589
Epoch 3/5


Training Epoch 3: 100%|██████████| 47/47 [00:21<00:00,  2.19it/s, loss=0.0641]


Epoch 3 Loss: 6.9446
Epoch 4/5


Training Epoch 4: 100%|██████████| 47/47 [00:22<00:00,  2.13it/s, loss=0.125] 


Epoch 4 Loss: 4.8729
Epoch 5/5


Training Epoch 5: 100%|██████████| 47/47 [00:21<00:00,  2.17it/s, loss=0.138] 


Epoch 5 Loss: 3.7820
Test Metrics: Precision=0.9338, Recall=0.9338, F1=0.9338

Fine-tuning roberta-large (large) with Train Size 370, Split 4...


Map: 100%|██████████| 370/370 [00:00<00:00, 9094.22 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 7119.10 examples/s]
Map: 100%|██████████| 804/804 [00:00<00:00, 8957.88 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 47/47 [00:21<00:00,  2.20it/s, loss=0.184]


Epoch 1 Loss: 23.8452
Epoch 2/5


Training Epoch 2: 100%|██████████| 47/47 [00:21<00:00,  2.23it/s, loss=0.277]


Epoch 2 Loss: 10.8914
Epoch 3/5


Training Epoch 3: 100%|██████████| 47/47 [00:21<00:00,  2.16it/s, loss=0.161] 


Epoch 3 Loss: 7.4860
Epoch 4/5


Training Epoch 4: 100%|██████████| 47/47 [00:20<00:00,  2.26it/s, loss=0.0255]


Epoch 4 Loss: 5.2266
Epoch 5/5


Training Epoch 5: 100%|██████████| 47/47 [00:21<00:00,  2.18it/s, loss=0.089] 


Epoch 5 Loss: 3.9508
Test Metrics: Precision=0.9346, Recall=0.9346, F1=0.9346

Fine-tuning roberta-large (large) with Train Size 370, Split 5...


Map: 100%|██████████| 370/370 [00:00<00:00, 8633.86 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 6707.70 examples/s]
Map: 100%|██████████| 804/804 [00:00<00:00, 9289.74 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 47/47 [00:22<00:00,  2.06it/s, loss=0.211]


Epoch 1 Loss: 24.8653
Epoch 2/5


Training Epoch 2: 100%|██████████| 47/47 [00:22<00:00,  2.09it/s, loss=0.297]


Epoch 2 Loss: 11.9125
Epoch 3/5


Training Epoch 3: 100%|██████████| 47/47 [00:22<00:00,  2.10it/s, loss=0.0464]


Epoch 3 Loss: 7.6830
Epoch 4/5


Training Epoch 4: 100%|██████████| 47/47 [00:23<00:00,  2.03it/s, loss=0.072] 


Epoch 4 Loss: 4.9487
Epoch 5/5


Training Epoch 5: 100%|██████████| 47/47 [00:22<00:00,  2.08it/s, loss=0.0697]


Epoch 5 Loss: 3.5594
Test Metrics: Precision=0.9370, Recall=0.9370, F1=0.9370

Fine-tuning roberta-large (large) with Train Size 375, Split 1...


Map: 100%|██████████| 375/375 [00:00<00:00, 9000.55 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 7856.66 examples/s]
Map: 100%|██████████| 798/798 [00:00<00:00, 9116.98 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 47/47 [00:21<00:00,  2.19it/s, loss=0.354]


Epoch 1 Loss: 24.5326
Epoch 2/5


Training Epoch 2: 100%|██████████| 47/47 [00:21<00:00,  2.20it/s, loss=0.297] 


Epoch 2 Loss: 12.4225
Epoch 3/5


Training Epoch 3: 100%|██████████| 47/47 [00:21<00:00,  2.18it/s, loss=0.146] 


Epoch 3 Loss: 8.1844
Epoch 4/5


Training Epoch 4: 100%|██████████| 47/47 [00:21<00:00,  2.21it/s, loss=0.15]  


Epoch 4 Loss: 5.7171
Epoch 5/5


Training Epoch 5: 100%|██████████| 47/47 [00:20<00:00,  2.24it/s, loss=0.0568]


Epoch 5 Loss: 4.1185
Test Metrics: Precision=0.9330, Recall=0.9330, F1=0.9330

Fine-tuning roberta-large (large) with Train Size 375, Split 2...


Map: 100%|██████████| 375/375 [00:00<00:00, 8885.63 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 6970.68 examples/s]
Map: 100%|██████████| 798/798 [00:00<00:00, 8990.18 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 47/47 [00:21<00:00,  2.14it/s, loss=0.29] 


Epoch 1 Loss: 25.4124
Epoch 2/5


Training Epoch 2: 100%|██████████| 47/47 [00:21<00:00,  2.16it/s, loss=0.304]


Epoch 2 Loss: 11.5567
Epoch 3/5


Training Epoch 3: 100%|██████████| 47/47 [00:21<00:00,  2.15it/s, loss=0.117] 


Epoch 3 Loss: 7.2434
Epoch 4/5


Training Epoch 4: 100%|██████████| 47/47 [00:21<00:00,  2.15it/s, loss=0.0681]


Epoch 4 Loss: 4.6439
Epoch 5/5


Training Epoch 5: 100%|██████████| 47/47 [00:22<00:00,  2.07it/s, loss=0.0957]


Epoch 5 Loss: 3.3519
Test Metrics: Precision=0.9348, Recall=0.9348, F1=0.9348

Fine-tuning roberta-large (large) with Train Size 375, Split 3...


Map: 100%|██████████| 375/375 [00:00<00:00, 8678.06 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 9068.37 examples/s]
Map: 100%|██████████| 798/798 [00:00<00:00, 9090.32 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 47/47 [00:22<00:00,  2.12it/s, loss=0.259]


Epoch 1 Loss: 22.8588
Epoch 2/5


Training Epoch 2: 100%|██████████| 47/47 [00:22<00:00,  2.08it/s, loss=0.172]


Epoch 2 Loss: 10.1413
Epoch 3/5


Training Epoch 3: 100%|██████████| 47/47 [00:22<00:00,  2.11it/s, loss=0.19]  


Epoch 3 Loss: 6.9823
Epoch 4/5


Training Epoch 4: 100%|██████████| 47/47 [00:22<00:00,  2.12it/s, loss=0.0441]


Epoch 4 Loss: 4.2266
Epoch 5/5


Training Epoch 5: 100%|██████████| 47/47 [00:22<00:00,  2.07it/s, loss=0.0524]


Epoch 5 Loss: 3.0187
Test Metrics: Precision=0.9344, Recall=0.9344, F1=0.9344

Fine-tuning roberta-large (large) with Train Size 375, Split 4...


Map: 100%|██████████| 375/375 [00:00<00:00, 8453.58 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 7060.80 examples/s]
Map: 100%|██████████| 798/798 [00:00<00:00, 9025.16 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 47/47 [00:21<00:00,  2.17it/s, loss=0.501]


Epoch 1 Loss: 24.8157
Epoch 2/5


Training Epoch 2: 100%|██████████| 47/47 [00:22<00:00,  2.11it/s, loss=0.133]


Epoch 2 Loss: 11.5125
Epoch 3/5


Training Epoch 3: 100%|██████████| 47/47 [00:22<00:00,  2.13it/s, loss=0.135] 


Epoch 3 Loss: 7.5438
Epoch 4/5


Training Epoch 4: 100%|██████████| 47/47 [00:21<00:00,  2.20it/s, loss=0.0748]


Epoch 4 Loss: 4.9982
Epoch 5/5


Training Epoch 5: 100%|██████████| 47/47 [00:21<00:00,  2.19it/s, loss=0.0525]


Epoch 5 Loss: 3.6048
Test Metrics: Precision=0.9351, Recall=0.9351, F1=0.9351

Fine-tuning roberta-large (large) with Train Size 375, Split 5...


Map: 100%|██████████| 375/375 [00:00<00:00, 8067.62 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 6873.50 examples/s]
Map: 100%|██████████| 798/798 [00:00<00:00, 9289.63 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 47/47 [00:23<00:00,  2.01it/s, loss=0.939]


Epoch 1 Loss: 46.9598
Epoch 2/5


Training Epoch 2: 100%|██████████| 47/47 [00:22<00:00,  2.05it/s, loss=0.912]


Epoch 2 Loss: 40.5876
Epoch 3/5


Training Epoch 3: 100%|██████████| 47/47 [00:23<00:00,  2.02it/s, loss=0.623]


Epoch 3 Loss: 29.6969
Epoch 4/5


Training Epoch 4: 100%|██████████| 47/47 [00:22<00:00,  2.05it/s, loss=0.406]


Epoch 4 Loss: 21.3968
Epoch 5/5


Training Epoch 5: 100%|██████████| 47/47 [00:22<00:00,  2.09it/s, loss=0.365]


Epoch 5 Loss: 17.6996
Test Metrics: Precision=0.8839, Recall=0.8839, F1=0.8839

Fine-tuning roberta-large (large) with Train Size 380, Split 1...


Map: 100%|██████████| 380/380 [00:00<00:00, 8466.45 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 6921.44 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9276.40 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 48/48 [00:21<00:00,  2.21it/s, loss=0.334]


Epoch 1 Loss: 25.8210
Epoch 2/5


Training Epoch 2: 100%|██████████| 48/48 [00:21<00:00,  2.25it/s, loss=0.52] 


Epoch 2 Loss: 12.5260
Epoch 3/5


Training Epoch 3: 100%|██████████| 48/48 [00:21<00:00,  2.23it/s, loss=0.16]  


Epoch 3 Loss: 8.5147
Epoch 4/5


Training Epoch 4: 100%|██████████| 48/48 [00:21<00:00,  2.19it/s, loss=0.104] 


Epoch 4 Loss: 5.8710
Epoch 5/5


Training Epoch 5: 100%|██████████| 48/48 [00:21<00:00,  2.25it/s, loss=0.0422]


Epoch 5 Loss: 4.3669
Test Metrics: Precision=0.9343, Recall=0.9343, F1=0.9343

Fine-tuning roberta-large (large) with Train Size 380, Split 2...


Map: 100%|██████████| 380/380 [00:00<00:00, 8958.41 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7836.54 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9376.93 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 48/48 [00:22<00:00,  2.10it/s, loss=0.217]


Epoch 1 Loss: 24.0517
Epoch 2/5


Training Epoch 2: 100%|██████████| 48/48 [00:22<00:00,  2.11it/s, loss=0.276]


Epoch 2 Loss: 11.6855
Epoch 3/5


Training Epoch 3: 100%|██████████| 48/48 [00:22<00:00,  2.13it/s, loss=0.175] 


Epoch 3 Loss: 7.8345
Epoch 4/5


Training Epoch 4: 100%|██████████| 48/48 [00:22<00:00,  2.16it/s, loss=0.0891]


Epoch 4 Loss: 5.2900
Epoch 5/5


Training Epoch 5: 100%|██████████| 48/48 [00:21<00:00,  2.19it/s, loss=0.117] 


Epoch 5 Loss: 3.7914
Test Metrics: Precision=0.9351, Recall=0.9351, F1=0.9351

Fine-tuning roberta-large (large) with Train Size 380, Split 3...


Map: 100%|██████████| 380/380 [00:00<00:00, 8610.49 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 8084.38 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9245.39 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 48/48 [00:21<00:00,  2.22it/s, loss=0.631]


Epoch 1 Loss: 25.1704
Epoch 2/5


Training Epoch 2: 100%|██████████| 48/48 [00:22<00:00,  2.17it/s, loss=0.114] 


Epoch 2 Loss: 11.4780
Epoch 3/5


Training Epoch 3: 100%|██████████| 48/48 [00:22<00:00,  2.18it/s, loss=0.102] 


Epoch 3 Loss: 7.4374
Epoch 4/5


Training Epoch 4: 100%|██████████| 48/48 [00:22<00:00,  2.15it/s, loss=0.137] 


Epoch 4 Loss: 5.0073
Epoch 5/5


Training Epoch 5: 100%|██████████| 48/48 [00:22<00:00,  2.13it/s, loss=0.078] 


Epoch 5 Loss: 3.8455
Test Metrics: Precision=0.9353, Recall=0.9353, F1=0.9353

Fine-tuning roberta-large (large) with Train Size 380, Split 4...


Map: 100%|██████████| 380/380 [00:00<00:00, 9189.28 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7781.26 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 3895.31 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 48/48 [00:22<00:00,  2.17it/s, loss=0.231]


Epoch 1 Loss: 26.9448
Epoch 2/5


Training Epoch 2: 100%|██████████| 48/48 [00:21<00:00,  2.19it/s, loss=0.406]


Epoch 2 Loss: 12.0946
Epoch 3/5


Training Epoch 3: 100%|██████████| 48/48 [00:21<00:00,  2.18it/s, loss=0.0736]


Epoch 3 Loss: 9.0849
Epoch 4/5


Training Epoch 4: 100%|██████████| 48/48 [00:22<00:00,  2.15it/s, loss=0.111] 


Epoch 4 Loss: 6.1635
Epoch 5/5


Training Epoch 5: 100%|██████████| 48/48 [00:21<00:00,  2.20it/s, loss=0.0867]


Epoch 5 Loss: 4.6977
Test Metrics: Precision=0.9317, Recall=0.9317, F1=0.9317

Fine-tuning roberta-large (large) with Train Size 380, Split 5...


Map: 100%|██████████| 380/380 [00:00<00:00, 8826.45 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 6710.89 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9261.30 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 48/48 [00:22<00:00,  2.10it/s, loss=0.139]


Epoch 1 Loss: 25.9395
Epoch 2/5


Training Epoch 2: 100%|██████████| 48/48 [00:23<00:00,  2.02it/s, loss=0.296]


Epoch 2 Loss: 12.9476
Epoch 3/5


Training Epoch 3: 100%|██████████| 48/48 [00:24<00:00,  1.99it/s, loss=0.379] 


Epoch 3 Loss: 8.9446
Epoch 4/5


Training Epoch 4: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s, loss=0.102] 


Epoch 4 Loss: 5.9731
Epoch 5/5


Training Epoch 5: 100%|██████████| 48/48 [00:23<00:00,  2.04it/s, loss=0.0902]


Epoch 5 Loss: 4.1488
Test Metrics: Precision=0.9374, Recall=0.9374, F1=0.9374

Fine-tuning roberta-large (large) with Train Size 385, Split 1...


Map: 100%|██████████| 385/385 [00:00<00:00, 8608.22 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 7604.64 examples/s]
Map: 100%|██████████| 786/786 [00:00<00:00, 9368.48 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 49/49 [00:22<00:00,  2.21it/s, loss=0.339]


Epoch 1 Loss: 26.7323
Epoch 2/5


Training Epoch 2: 100%|██████████| 49/49 [00:21<00:00,  2.26it/s, loss=0.119]


Epoch 2 Loss: 12.2301
Epoch 3/5


Training Epoch 3: 100%|██████████| 49/49 [00:21<00:00,  2.23it/s, loss=0.473] 


Epoch 3 Loss: 9.1572
Epoch 4/5


Training Epoch 4: 100%|██████████| 49/49 [00:21<00:00,  2.25it/s, loss=0.286] 


Epoch 4 Loss: 6.4259
Epoch 5/5


Training Epoch 5: 100%|██████████| 49/49 [00:22<00:00,  2.21it/s, loss=0.015] 


Epoch 5 Loss: 4.8170
Test Metrics: Precision=0.9358, Recall=0.9358, F1=0.9358

Fine-tuning roberta-large (large) with Train Size 385, Split 2...


Map: 100%|██████████| 385/385 [00:00<00:00, 8851.07 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 7113.69 examples/s]
Map: 100%|██████████| 786/786 [00:00<00:00, 9112.15 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 49/49 [00:23<00:00,  2.11it/s, loss=0.142]


Epoch 1 Loss: 26.3466
Epoch 2/5


Training Epoch 2: 100%|██████████| 49/49 [00:21<00:00,  2.24it/s, loss=1.07] 


Epoch 2 Loss: 12.9877
Epoch 3/5


Training Epoch 3: 100%|██████████| 49/49 [00:22<00:00,  2.20it/s, loss=0.208] 


Epoch 3 Loss: 8.5377
Epoch 4/5


Training Epoch 4: 100%|██████████| 49/49 [00:23<00:00,  2.12it/s, loss=0.0141]


Epoch 4 Loss: 5.7931
Epoch 5/5


Training Epoch 5: 100%|██████████| 49/49 [00:22<00:00,  2.16it/s, loss=0.208] 


Epoch 5 Loss: 4.4833
Test Metrics: Precision=0.9325, Recall=0.9325, F1=0.9325

Fine-tuning roberta-large (large) with Train Size 385, Split 3...


Map: 100%|██████████| 385/385 [00:00<00:00, 8826.98 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 7595.34 examples/s]
Map: 100%|██████████| 786/786 [00:00<00:00, 9076.86 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 49/49 [00:22<00:00,  2.19it/s, loss=0.362]


Epoch 1 Loss: 24.5615
Epoch 2/5


Training Epoch 2: 100%|██████████| 49/49 [00:22<00:00,  2.16it/s, loss=0.329]


Epoch 2 Loss: 11.6947
Epoch 3/5


Training Epoch 3: 100%|██████████| 49/49 [00:23<00:00,  2.11it/s, loss=0.103] 


Epoch 3 Loss: 8.0105
Epoch 4/5


Training Epoch 4: 100%|██████████| 49/49 [00:22<00:00,  2.22it/s, loss=0.0585]


Epoch 4 Loss: 5.1382
Epoch 5/5


Training Epoch 5: 100%|██████████| 49/49 [00:22<00:00,  2.17it/s, loss=0.305] 


Epoch 5 Loss: 3.8686
Test Metrics: Precision=0.9373, Recall=0.9373, F1=0.9373

Fine-tuning roberta-large (large) with Train Size 385, Split 4...


Map: 100%|██████████| 385/385 [00:00<00:00, 8955.93 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 6786.33 examples/s]
Map: 100%|██████████| 786/786 [00:00<00:00, 9083.51 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 49/49 [00:22<00:00,  2.20it/s, loss=0.597]


Epoch 1 Loss: 24.4131
Epoch 2/5


Training Epoch 2: 100%|██████████| 49/49 [00:23<00:00,  2.13it/s, loss=0.0705]


Epoch 2 Loss: 12.8273
Epoch 3/5


Training Epoch 3: 100%|██████████| 49/49 [00:22<00:00,  2.15it/s, loss=0.145]


Epoch 3 Loss: 8.9595
Epoch 4/5


Training Epoch 4: 100%|██████████| 49/49 [00:23<00:00,  2.13it/s, loss=0.0411]


Epoch 4 Loss: 6.1825
Epoch 5/5


Training Epoch 5: 100%|██████████| 49/49 [00:22<00:00,  2.15it/s, loss=0.0412]


Epoch 5 Loss: 4.8866
Test Metrics: Precision=0.9319, Recall=0.9319, F1=0.9319

Fine-tuning roberta-large (large) with Train Size 385, Split 5...


Map: 100%|██████████| 385/385 [00:00<00:00, 8469.08 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 6529.88 examples/s]
Map: 100%|██████████| 786/786 [00:00<00:00, 9243.48 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 49/49 [00:24<00:00,  2.01it/s, loss=0.367]


Epoch 1 Loss: 24.6499
Epoch 2/5


Training Epoch 2: 100%|██████████| 49/49 [00:23<00:00,  2.06it/s, loss=0.162]


Epoch 2 Loss: 13.2476
Epoch 3/5


Training Epoch 3: 100%|██████████| 49/49 [00:23<00:00,  2.04it/s, loss=0.202] 


Epoch 3 Loss: 8.5920
Epoch 4/5


Training Epoch 4: 100%|██████████| 49/49 [00:22<00:00,  2.14it/s, loss=0.0543]


Epoch 4 Loss: 6.1015
Epoch 5/5


Training Epoch 5: 100%|██████████| 49/49 [00:23<00:00,  2.07it/s, loss=0.162] 


Epoch 5 Loss: 4.7788
Test Metrics: Precision=0.9365, Recall=0.9365, F1=0.9365

Fine-tuning roberta-large (large) with Train Size 390, Split 1...


Map: 100%|██████████| 390/390 [00:00<00:00, 8735.57 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 7819.77 examples/s]
Map: 100%|██████████| 780/780 [00:00<00:00, 9122.71 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 49/49 [00:21<00:00,  2.30it/s, loss=0.425]


Epoch 1 Loss: 27.2917
Epoch 2/5


Training Epoch 2: 100%|██████████| 49/49 [00:21<00:00,  2.26it/s, loss=0.192]


Epoch 2 Loss: 12.9885
Epoch 3/5


Training Epoch 3: 100%|██████████| 49/49 [00:22<00:00,  2.21it/s, loss=0.162] 


Epoch 3 Loss: 9.2462
Epoch 4/5


Training Epoch 4: 100%|██████████| 49/49 [00:21<00:00,  2.24it/s, loss=0.114] 


Epoch 4 Loss: 6.3984
Epoch 5/5


Training Epoch 5: 100%|██████████| 49/49 [00:22<00:00,  2.22it/s, loss=0.138] 


Epoch 5 Loss: 4.5645
Test Metrics: Precision=0.9347, Recall=0.9347, F1=0.9347

Fine-tuning roberta-large (large) with Train Size 390, Split 2...


Map: 100%|██████████| 390/390 [00:00<00:00, 2466.59 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 7275.46 examples/s]
Map: 100%|██████████| 780/780 [00:00<00:00, 8830.76 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 49/49 [00:21<00:00,  2.26it/s, loss=0.371]


Epoch 1 Loss: 27.6161
Epoch 2/5


Training Epoch 2: 100%|██████████| 49/49 [00:22<00:00,  2.21it/s, loss=0.287] 


Epoch 2 Loss: 12.3764
Epoch 3/5


Training Epoch 3: 100%|██████████| 49/49 [00:22<00:00,  2.20it/s, loss=0.126] 


Epoch 3 Loss: 8.0104
Epoch 4/5


Training Epoch 4: 100%|██████████| 49/49 [00:22<00:00,  2.22it/s, loss=0.0387]


Epoch 4 Loss: 5.6911
Epoch 5/5


Training Epoch 5: 100%|██████████| 49/49 [00:22<00:00,  2.15it/s, loss=0.0707]


Epoch 5 Loss: 4.1339
Test Metrics: Precision=0.9323, Recall=0.9323, F1=0.9323

Fine-tuning roberta-large (large) with Train Size 390, Split 3...


Map: 100%|██████████| 390/390 [00:00<00:00, 8414.97 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 7556.43 examples/s]
Map: 100%|██████████| 780/780 [00:00<00:00, 8828.61 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 49/49 [00:23<00:00,  2.12it/s, loss=0.372]


Epoch 1 Loss: 24.4227
Epoch 2/5


Training Epoch 2: 100%|██████████| 49/49 [00:22<00:00,  2.14it/s, loss=0.269]


Epoch 2 Loss: 12.1761
Epoch 3/5


Training Epoch 3: 100%|██████████| 49/49 [00:23<00:00,  2.08it/s, loss=0.261]


Epoch 3 Loss: 8.8223
Epoch 4/5


Training Epoch 4: 100%|██████████| 49/49 [00:23<00:00,  2.13it/s, loss=0.117] 


Epoch 4 Loss: 6.7412
Epoch 5/5


Training Epoch 5: 100%|██████████| 49/49 [00:23<00:00,  2.11it/s, loss=0.0579]


Epoch 5 Loss: 4.5512
Test Metrics: Precision=0.9360, Recall=0.9360, F1=0.9360

Fine-tuning roberta-large (large) with Train Size 390, Split 4...


Map: 100%|██████████| 390/390 [00:00<00:00, 8637.50 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 6846.70 examples/s]
Map: 100%|██████████| 780/780 [00:00<00:00, 9218.22 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 49/49 [00:22<00:00,  2.14it/s, loss=0.383]


Epoch 1 Loss: 27.3470
Epoch 2/5


Training Epoch 2: 100%|██████████| 49/49 [00:22<00:00,  2.19it/s, loss=0.14] 


Epoch 2 Loss: 11.8778
Epoch 3/5


Training Epoch 3: 100%|██████████| 49/49 [00:22<00:00,  2.17it/s, loss=0.134] 


Epoch 3 Loss: 8.5261
Epoch 4/5


Training Epoch 4: 100%|██████████| 49/49 [00:22<00:00,  2.15it/s, loss=0.0654]


Epoch 4 Loss: 5.8144
Epoch 5/5


Training Epoch 5: 100%|██████████| 49/49 [00:22<00:00,  2.21it/s, loss=0.0401]


Epoch 5 Loss: 4.2788
Test Metrics: Precision=0.9327, Recall=0.9327, F1=0.9327

Fine-tuning roberta-large (large) with Train Size 390, Split 5...


Map: 100%|██████████| 390/390 [00:00<00:00, 8704.51 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 7356.44 examples/s]
Map: 100%|██████████| 780/780 [00:00<00:00, 9096.17 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 49/49 [00:23<00:00,  2.08it/s, loss=0.364]


Epoch 1 Loss: 26.9177
Epoch 2/5


Training Epoch 2: 100%|██████████| 49/49 [00:24<00:00,  2.03it/s, loss=0.433]


Epoch 2 Loss: 12.9344
Epoch 3/5


Training Epoch 3: 100%|██████████| 49/49 [00:24<00:00,  2.03it/s, loss=0.159] 


Epoch 3 Loss: 8.7692
Epoch 4/5


Training Epoch 4: 100%|██████████| 49/49 [00:23<00:00,  2.05it/s, loss=0.105] 


Epoch 4 Loss: 6.0636
Epoch 5/5


Training Epoch 5: 100%|██████████| 49/49 [00:23<00:00,  2.06it/s, loss=0.21]  


Epoch 5 Loss: 4.5185
Test Metrics: Precision=0.9363, Recall=0.9363, F1=0.9363

Fine-tuning roberta-large (large) with Train Size 395, Split 1...


Map: 100%|██████████| 395/395 [00:00<00:00, 8514.19 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 6811.45 examples/s]
Map: 100%|██████████| 774/774 [00:00<00:00, 9287.11 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 50/50 [00:22<00:00,  2.22it/s, loss=0.205]


Epoch 1 Loss: 26.8646
Epoch 2/5


Training Epoch 2: 100%|██████████| 50/50 [00:22<00:00,  2.21it/s, loss=0.367]


Epoch 2 Loss: 13.1130
Epoch 3/5


Training Epoch 3: 100%|██████████| 50/50 [00:21<00:00,  2.28it/s, loss=0.101]


Epoch 3 Loss: 8.7047
Epoch 4/5


Training Epoch 4: 100%|██████████| 50/50 [00:22<00:00,  2.24it/s, loss=0.0956]


Epoch 4 Loss: 6.7737
Epoch 5/5


Training Epoch 5: 100%|██████████| 50/50 [00:21<00:00,  2.28it/s, loss=0.143] 


Epoch 5 Loss: 4.9462
Test Metrics: Precision=0.9344, Recall=0.9344, F1=0.9344

Fine-tuning roberta-large (large) with Train Size 395, Split 2...


Map: 100%|██████████| 395/395 [00:00<00:00, 8020.05 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 7683.66 examples/s]
Map: 100%|██████████| 774/774 [00:00<00:00, 9229.89 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 50/50 [00:22<00:00,  2.18it/s, loss=0.235]


Epoch 1 Loss: 26.1361
Epoch 2/5


Training Epoch 2: 100%|██████████| 50/50 [00:22<00:00,  2.22it/s, loss=0.42] 


Epoch 2 Loss: 12.9848
Epoch 3/5


Training Epoch 3: 100%|██████████| 50/50 [00:23<00:00,  2.17it/s, loss=0.292] 


Epoch 3 Loss: 8.4411
Epoch 4/5


Training Epoch 4: 100%|██████████| 50/50 [00:22<00:00,  2.18it/s, loss=0.221] 


Epoch 4 Loss: 5.9428
Epoch 5/5


Training Epoch 5: 100%|██████████| 50/50 [00:22<00:00,  2.23it/s, loss=0.0347]


Epoch 5 Loss: 4.3980
Test Metrics: Precision=0.9320, Recall=0.9320, F1=0.9320

Fine-tuning roberta-large (large) with Train Size 395, Split 3...


Map: 100%|██████████| 395/395 [00:00<00:00, 8416.39 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 7284.82 examples/s]
Map: 100%|██████████| 774/774 [00:00<00:00, 9286.95 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 50/50 [00:23<00:00,  2.13it/s, loss=0.37] 


Epoch 1 Loss: 27.2422
Epoch 2/5


Training Epoch 2: 100%|██████████| 50/50 [00:23<00:00,  2.11it/s, loss=0.251]


Epoch 2 Loss: 13.8469
Epoch 3/5


Training Epoch 3: 100%|██████████| 50/50 [00:23<00:00,  2.11it/s, loss=0.105] 


Epoch 3 Loss: 9.1595
Epoch 4/5


Training Epoch 4: 100%|██████████| 50/50 [00:23<00:00,  2.17it/s, loss=0.215] 


Epoch 4 Loss: 6.3214
Epoch 5/5


Training Epoch 5: 100%|██████████| 50/50 [00:23<00:00,  2.15it/s, loss=0.19]  


Epoch 5 Loss: 4.7386
Test Metrics: Precision=0.9327, Recall=0.9327, F1=0.9327

Fine-tuning roberta-large (large) with Train Size 395, Split 4...


Map: 100%|██████████| 395/395 [00:00<00:00, 8875.05 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 7577.18 examples/s]
Map: 100%|██████████| 774/774 [00:00<00:00, 9102.81 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 50/50 [00:22<00:00,  2.19it/s, loss=0.169]


Epoch 1 Loss: 24.2020
Epoch 2/5


Training Epoch 2: 100%|██████████| 50/50 [00:22<00:00,  2.21it/s, loss=0.0966]


Epoch 2 Loss: 11.8748
Epoch 3/5


Training Epoch 3: 100%|██████████| 50/50 [00:23<00:00,  2.15it/s, loss=0.115] 


Epoch 3 Loss: 7.8240
Epoch 4/5


Training Epoch 4: 100%|██████████| 50/50 [00:22<00:00,  2.23it/s, loss=0.085] 


Epoch 4 Loss: 5.6116
Epoch 5/5


Training Epoch 5: 100%|██████████| 50/50 [00:23<00:00,  2.16it/s, loss=0.0989]


Epoch 5 Loss: 4.1160
Test Metrics: Precision=0.9369, Recall=0.9369, F1=0.9369

Fine-tuning roberta-large (large) with Train Size 395, Split 5...


Map: 100%|██████████| 395/395 [00:00<00:00, 7799.52 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 7876.91 examples/s]
Map: 100%|██████████| 774/774 [00:00<00:00, 3817.00 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 50/50 [00:23<00:00,  2.16it/s, loss=0.227]


Epoch 1 Loss: 25.7526
Epoch 2/5


Training Epoch 2: 100%|██████████| 50/50 [00:24<00:00,  2.05it/s, loss=0.0717]


Epoch 2 Loss: 12.2245
Epoch 3/5


Training Epoch 3: 100%|██████████| 50/50 [00:24<00:00,  2.06it/s, loss=0.0723]


Epoch 3 Loss: 8.9458
Epoch 4/5


Training Epoch 4: 100%|██████████| 50/50 [00:24<00:00,  2.03it/s, loss=0.138] 


Epoch 4 Loss: 6.3186
Epoch 5/5


Training Epoch 5: 100%|██████████| 50/50 [00:24<00:00,  2.07it/s, loss=0.138] 


Epoch 5 Loss: 4.8303
Test Metrics: Precision=0.9369, Recall=0.9369, F1=0.9369

Fine-tuning roberta-large (large) with Train Size 400, Split 1...


Map: 100%|██████████| 400/400 [00:00<00:00, 8765.89 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7315.59 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9156.05 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 50/50 [00:22<00:00,  2.18it/s, loss=0.207]


Epoch 1 Loss: 23.2702
Epoch 2/5


Training Epoch 2: 100%|██████████| 50/50 [00:22<00:00,  2.22it/s, loss=0.205]


Epoch 2 Loss: 11.1909
Epoch 3/5


Training Epoch 3: 100%|██████████| 50/50 [00:22<00:00,  2.20it/s, loss=0.136] 


Epoch 3 Loss: 7.4692
Epoch 4/5


Training Epoch 4: 100%|██████████| 50/50 [00:22<00:00,  2.23it/s, loss=0.0689]


Epoch 4 Loss: 5.0162
Epoch 5/5


Training Epoch 5: 100%|██████████| 50/50 [00:22<00:00,  2.23it/s, loss=0.0461]


Epoch 5 Loss: 3.2629
Test Metrics: Precision=0.9393, Recall=0.9393, F1=0.9393

Fine-tuning roberta-large (large) with Train Size 400, Split 2...


Map: 100%|██████████| 400/400 [00:00<00:00, 8881.77 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7503.73 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9162.40 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 50/50 [00:22<00:00,  2.22it/s, loss=0.274]


Epoch 1 Loss: 26.9035
Epoch 2/5


Training Epoch 2: 100%|██████████| 50/50 [00:22<00:00,  2.18it/s, loss=0.21] 


Epoch 2 Loss: 12.8535
Epoch 3/5


Training Epoch 3: 100%|██████████| 50/50 [00:23<00:00,  2.15it/s, loss=0.138] 


Epoch 3 Loss: 8.4674
Epoch 4/5


Training Epoch 4: 100%|██████████| 50/50 [00:22<00:00,  2.21it/s, loss=0.0996]


Epoch 4 Loss: 5.7952
Epoch 5/5


Training Epoch 5: 100%|██████████| 50/50 [00:23<00:00,  2.17it/s, loss=0.0351]


Epoch 5 Loss: 4.5248
Test Metrics: Precision=0.9334, Recall=0.9334, F1=0.9334

Fine-tuning roberta-large (large) with Train Size 400, Split 3...


Map: 100%|██████████| 400/400 [00:00<00:00, 8351.24 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 6596.11 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9155.03 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 50/50 [00:23<00:00,  2.12it/s, loss=0.22] 


Epoch 1 Loss: 24.3865
Epoch 2/5


Training Epoch 2: 100%|██████████| 50/50 [00:23<00:00,  2.10it/s, loss=0.323]


Epoch 2 Loss: 12.5737
Epoch 3/5


Training Epoch 3: 100%|██████████| 50/50 [00:23<00:00,  2.16it/s, loss=0.173] 


Epoch 3 Loss: 8.6627
Epoch 4/5


Training Epoch 4: 100%|██████████| 50/50 [00:23<00:00,  2.14it/s, loss=0.165] 


Epoch 4 Loss: 6.1665
Epoch 5/5


Training Epoch 5: 100%|██████████| 50/50 [00:23<00:00,  2.14it/s, loss=0.085] 


Epoch 5 Loss: 4.8864
Test Metrics: Precision=0.9339, Recall=0.9339, F1=0.9339

Fine-tuning roberta-large (large) with Train Size 400, Split 4...


Map: 100%|██████████| 400/400 [00:00<00:00, 8716.98 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 6546.18 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9032.25 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 50/50 [00:23<00:00,  2.15it/s, loss=0.258]


Epoch 1 Loss: 26.4572
Epoch 2/5


Training Epoch 2: 100%|██████████| 50/50 [00:23<00:00,  2.15it/s, loss=0.149]


Epoch 2 Loss: 12.5205
Epoch 3/5


Training Epoch 3: 100%|██████████| 50/50 [00:23<00:00,  2.17it/s, loss=0.204] 


Epoch 3 Loss: 8.6857
Epoch 4/5


Training Epoch 4: 100%|██████████| 50/50 [00:22<00:00,  2.19it/s, loss=0.106] 


Epoch 4 Loss: 6.0735
Epoch 5/5


Training Epoch 5: 100%|██████████| 50/50 [00:23<00:00,  2.17it/s, loss=0.0745]


Epoch 5 Loss: 4.6787
Test Metrics: Precision=0.9341, Recall=0.9341, F1=0.9341

Fine-tuning roberta-large (large) with Train Size 400, Split 5...


Map: 100%|██████████| 400/400 [00:00<00:00, 8598.06 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7103.87 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9034.58 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 50/50 [00:24<00:00,  2.03it/s, loss=0.407]


Epoch 1 Loss: 25.9627
Epoch 2/5


Training Epoch 2: 100%|██████████| 50/50 [00:23<00:00,  2.09it/s, loss=0.421]


Epoch 2 Loss: 13.6364
Epoch 3/5


Training Epoch 3: 100%|██████████| 50/50 [00:24<00:00,  2.07it/s, loss=0.0439]


Epoch 3 Loss: 9.1003
Epoch 4/5


Training Epoch 4: 100%|██████████| 50/50 [00:24<00:00,  2.04it/s, loss=0.173] 


Epoch 4 Loss: 6.1287
Epoch 5/5


Training Epoch 5: 100%|██████████| 50/50 [00:23<00:00,  2.09it/s, loss=0.104] 


Epoch 5 Loss: 4.5067
Test Metrics: Precision=0.9356, Recall=0.9356, F1=0.9356

Fine-tuning roberta-large (large) with Train Size 405, Split 1...


Map: 100%|██████████| 405/405 [00:00<00:00, 7978.87 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 7016.93 examples/s]
Map: 100%|██████████| 762/762 [00:00<00:00, 9014.89 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 51/51 [00:23<00:00,  2.19it/s, loss=0.363]


Epoch 1 Loss: 27.9941
Epoch 2/5


Training Epoch 2: 100%|██████████| 51/51 [00:22<00:00,  2.23it/s, loss=0.229]


Epoch 2 Loss: 12.5072
Epoch 3/5


Training Epoch 3: 100%|██████████| 51/51 [00:23<00:00,  2.21it/s, loss=0.139] 


Epoch 3 Loss: 8.2913
Epoch 4/5


Training Epoch 4: 100%|██████████| 51/51 [00:22<00:00,  2.29it/s, loss=0.0646]


Epoch 4 Loss: 5.2991
Epoch 5/5


Training Epoch 5: 100%|██████████| 51/51 [00:22<00:00,  2.25it/s, loss=0.0964]


Epoch 5 Loss: 3.8276
Test Metrics: Precision=0.9357, Recall=0.9357, F1=0.9357

Fine-tuning roberta-large (large) with Train Size 405, Split 2...


Map: 100%|██████████| 405/405 [00:00<00:00, 8830.62 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 7883.48 examples/s]
Map: 100%|██████████| 762/762 [00:00<00:00, 8942.88 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 51/51 [00:22<00:00,  2.24it/s, loss=0.307]


Epoch 1 Loss: 24.6178
Epoch 2/5


Training Epoch 2: 100%|██████████| 51/51 [00:23<00:00,  2.21it/s, loss=0.149] 


Epoch 2 Loss: 12.1606
Epoch 3/5


Training Epoch 3: 100%|██████████| 51/51 [00:23<00:00,  2.18it/s, loss=0.0445]


Epoch 3 Loss: 7.8149
Epoch 4/5


Training Epoch 4: 100%|██████████| 51/51 [00:23<00:00,  2.20it/s, loss=0.0964]


Epoch 4 Loss: 5.0254
Epoch 5/5


Training Epoch 5: 100%|██████████| 51/51 [00:23<00:00,  2.18it/s, loss=0.107] 


Epoch 5 Loss: 3.7635
Test Metrics: Precision=0.9341, Recall=0.9341, F1=0.9341

Fine-tuning roberta-large (large) with Train Size 405, Split 3...


Map: 100%|██████████| 405/405 [00:00<00:00, 7861.23 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 6541.74 examples/s]
Map: 100%|██████████| 762/762 [00:00<00:00, 9095.81 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 51/51 [00:23<00:00,  2.19it/s, loss=0.247]


Epoch 1 Loss: 26.6024
Epoch 2/5


Training Epoch 2: 100%|██████████| 51/51 [00:23<00:00,  2.21it/s, loss=0.418] 


Epoch 2 Loss: 11.9010
Epoch 3/5


Training Epoch 3: 100%|██████████| 51/51 [00:24<00:00,  2.09it/s, loss=0.0698]


Epoch 3 Loss: 8.1303
Epoch 4/5


Training Epoch 4: 100%|██████████| 51/51 [00:23<00:00,  2.15it/s, loss=0.304] 


Epoch 4 Loss: 5.3393
Epoch 5/5


Training Epoch 5: 100%|██████████| 51/51 [00:24<00:00,  2.12it/s, loss=0.0472]


Epoch 5 Loss: 4.1004
Test Metrics: Precision=0.9339, Recall=0.9339, F1=0.9339

Fine-tuning roberta-large (large) with Train Size 405, Split 4...


Map: 100%|██████████| 405/405 [00:00<00:00, 8864.21 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 6776.34 examples/s]
Map: 100%|██████████| 762/762 [00:00<00:00, 9232.31 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 51/51 [00:23<00:00,  2.15it/s, loss=0.377]


Epoch 1 Loss: 27.4744
Epoch 2/5


Training Epoch 2: 100%|██████████| 51/51 [00:23<00:00,  2.15it/s, loss=0.212]


Epoch 2 Loss: 12.5439
Epoch 3/5


Training Epoch 3: 100%|██████████| 51/51 [00:23<00:00,  2.17it/s, loss=0.15]  


Epoch 3 Loss: 8.6174
Epoch 4/5


Training Epoch 4: 100%|██████████| 51/51 [00:23<00:00,  2.20it/s, loss=0.0457]


Epoch 4 Loss: 5.8074
Epoch 5/5


Training Epoch 5: 100%|██████████| 51/51 [00:23<00:00,  2.18it/s, loss=0.0385]


Epoch 5 Loss: 4.3358
Test Metrics: Precision=0.9315, Recall=0.9315, F1=0.9315

Fine-tuning roberta-large (large) with Train Size 405, Split 5...


Map: 100%|██████████| 405/405 [00:00<00:00, 8366.75 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 7072.58 examples/s]
Map: 100%|██████████| 762/762 [00:00<00:00, 9010.80 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 51/51 [00:24<00:00,  2.08it/s, loss=0.252]


Epoch 1 Loss: 26.1961
Epoch 2/5


Training Epoch 2: 100%|██████████| 51/51 [00:24<00:00,  2.12it/s, loss=0.249]


Epoch 2 Loss: 11.9515
Epoch 3/5


Training Epoch 3: 100%|██████████| 51/51 [00:24<00:00,  2.05it/s, loss=0.0738]


Epoch 3 Loss: 8.1355
Epoch 4/5


Training Epoch 4: 100%|██████████| 51/51 [00:24<00:00,  2.07it/s, loss=0.0868]


Epoch 4 Loss: 5.1524
Epoch 5/5


Training Epoch 5: 100%|██████████| 51/51 [00:24<00:00,  2.08it/s, loss=0.0201]


Epoch 5 Loss: 3.8814
Test Metrics: Precision=0.9384, Recall=0.9384, F1=0.9384

Fine-tuning roberta-large (large) with Train Size 410, Split 1...


Map: 100%|██████████| 410/410 [00:00<00:00, 8618.06 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 7271.77 examples/s]
Map: 100%|██████████| 756/756 [00:00<00:00, 9121.78 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 52/52 [00:23<00:00,  2.24it/s, loss=0.825]


Epoch 1 Loss: 25.1672
Epoch 2/5


Training Epoch 2: 100%|██████████| 52/52 [00:23<00:00,  2.22it/s, loss=0.416]


Epoch 2 Loss: 19.3472
Epoch 3/5


Training Epoch 3: 100%|██████████| 52/52 [00:23<00:00,  2.19it/s, loss=0.221] 


Epoch 3 Loss: 10.8009
Epoch 4/5


Training Epoch 4: 100%|██████████| 52/52 [00:23<00:00,  2.24it/s, loss=0.116] 


Epoch 4 Loss: 7.7209
Epoch 5/5


Training Epoch 5: 100%|██████████| 52/52 [00:23<00:00,  2.18it/s, loss=0.0143]


Epoch 5 Loss: 6.0231
Test Metrics: Precision=0.9316, Recall=0.9316, F1=0.9316

Fine-tuning roberta-large (large) with Train Size 410, Split 2...


Map: 100%|██████████| 410/410 [00:00<00:00, 8893.22 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 8247.00 examples/s]
Map: 100%|██████████| 756/756 [00:00<00:00, 9200.73 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 52/52 [00:23<00:00,  2.23it/s, loss=0.162]


Epoch 1 Loss: 26.7267
Epoch 2/5


Training Epoch 2: 100%|██████████| 52/52 [00:23<00:00,  2.22it/s, loss=0.314]


Epoch 2 Loss: 12.6393
Epoch 3/5


Training Epoch 3: 100%|██████████| 52/52 [00:23<00:00,  2.21it/s, loss=0.206] 


Epoch 3 Loss: 8.5228
Epoch 4/5


Training Epoch 4: 100%|██████████| 52/52 [00:23<00:00,  2.19it/s, loss=0.103] 


Epoch 4 Loss: 5.9266
Epoch 5/5


Training Epoch 5: 100%|██████████| 52/52 [00:23<00:00,  2.19it/s, loss=0.0616]


Epoch 5 Loss: 4.0819
Test Metrics: Precision=0.9345, Recall=0.9345, F1=0.9345

Fine-tuning roberta-large (large) with Train Size 410, Split 3...


Map: 100%|██████████| 410/410 [00:00<00:00, 8780.61 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 7630.41 examples/s]
Map: 100%|██████████| 756/756 [00:00<00:00, 3886.21 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 52/52 [00:24<00:00,  2.12it/s, loss=0.208]


Epoch 1 Loss: 24.0241
Epoch 2/5


Training Epoch 2: 100%|██████████| 52/52 [00:23<00:00,  2.18it/s, loss=0.0912]


Epoch 2 Loss: 11.1612
Epoch 3/5


Training Epoch 3: 100%|██████████| 52/52 [00:24<00:00,  2.11it/s, loss=0.0426]


Epoch 3 Loss: 7.6856
Epoch 4/5


Training Epoch 4: 100%|██████████| 52/52 [00:24<00:00,  2.11it/s, loss=0.173] 


Epoch 4 Loss: 5.3658
Epoch 5/5


Training Epoch 5: 100%|██████████| 52/52 [00:24<00:00,  2.14it/s, loss=0.291] 


Epoch 5 Loss: 4.0527
Test Metrics: Precision=0.9363, Recall=0.9363, F1=0.9363

Fine-tuning roberta-large (large) with Train Size 410, Split 4...


Map: 100%|██████████| 410/410 [00:00<00:00, 8991.76 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 8006.63 examples/s]
Map: 100%|██████████| 756/756 [00:00<00:00, 9063.50 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 52/52 [00:22<00:00,  2.35it/s, loss=0.125]


Epoch 1 Loss: 25.0339
Epoch 2/5


Training Epoch 2: 100%|██████████| 52/52 [00:23<00:00,  2.20it/s, loss=0.337]


Epoch 2 Loss: 12.2697
Epoch 3/5


Training Epoch 3: 100%|██████████| 52/52 [00:22<00:00,  2.27it/s, loss=0.172] 


Epoch 3 Loss: 7.9503
Epoch 4/5


Training Epoch 4: 100%|██████████| 52/52 [00:23<00:00,  2.20it/s, loss=0.0979]


Epoch 4 Loss: 5.2130
Epoch 5/5


Training Epoch 5: 100%|██████████| 52/52 [00:24<00:00,  2.16it/s, loss=0.113] 


Epoch 5 Loss: 3.7363
Test Metrics: Precision=0.9342, Recall=0.9342, F1=0.9342

Fine-tuning roberta-large (large) with Train Size 410, Split 5...


Map: 100%|██████████| 410/410 [00:00<00:00, 8388.73 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 8059.54 examples/s]
Map: 100%|██████████| 756/756 [00:00<00:00, 9011.37 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 52/52 [00:25<00:00,  2.07it/s, loss=0.363]


Epoch 1 Loss: 25.7508
Epoch 2/5


Training Epoch 2: 100%|██████████| 52/52 [00:24<00:00,  2.10it/s, loss=0.0677]


Epoch 2 Loss: 12.2573
Epoch 3/5


Training Epoch 3: 100%|██████████| 52/52 [00:25<00:00,  2.06it/s, loss=0.236] 


Epoch 3 Loss: 8.0832
Epoch 4/5


Training Epoch 4: 100%|██████████| 52/52 [00:25<00:00,  2.07it/s, loss=0.113] 


Epoch 4 Loss: 5.7593
Epoch 5/5


Training Epoch 5: 100%|██████████| 52/52 [00:24<00:00,  2.10it/s, loss=0.034] 


Epoch 5 Loss: 3.6824
Test Metrics: Precision=0.9379, Recall=0.9379, F1=0.9379

Fine-tuning roberta-large (large) with Train Size 415, Split 1...


Map: 100%|██████████| 415/415 [00:00<00:00, 8733.75 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 7243.29 examples/s]
Map: 100%|██████████| 750/750 [00:00<00:00, 8991.62 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 52/52 [00:23<00:00,  2.22it/s, loss=0.302]


Epoch 1 Loss: 27.6535
Epoch 2/5


Training Epoch 2: 100%|██████████| 52/52 [00:24<00:00,  2.16it/s, loss=0.15] 


Epoch 2 Loss: 14.5549
Epoch 3/5


Training Epoch 3: 100%|██████████| 52/52 [00:23<00:00,  2.20it/s, loss=0.101]


Epoch 3 Loss: 9.0485
Epoch 4/5


Training Epoch 4: 100%|██████████| 52/52 [00:23<00:00,  2.22it/s, loss=0.0762]


Epoch 4 Loss: 5.9497
Epoch 5/5


Training Epoch 5: 100%|██████████| 52/52 [00:23<00:00,  2.25it/s, loss=0.15]  


Epoch 5 Loss: 4.3441
Test Metrics: Precision=0.9356, Recall=0.9356, F1=0.9356

Fine-tuning roberta-large (large) with Train Size 415, Split 2...


Map: 100%|██████████| 415/415 [00:00<00:00, 9173.46 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 6824.55 examples/s]
Map: 100%|██████████| 750/750 [00:00<00:00, 9261.08 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 52/52 [00:23<00:00,  2.20it/s, loss=0.474]


Epoch 1 Loss: 28.4376
Epoch 2/5


Training Epoch 2: 100%|██████████| 52/52 [00:23<00:00,  2.24it/s, loss=0.306]


Epoch 2 Loss: 13.5214
Epoch 3/5


Training Epoch 3: 100%|██████████| 52/52 [00:23<00:00,  2.24it/s, loss=0.152] 


Epoch 3 Loss: 8.8704
Epoch 4/5


Training Epoch 4: 100%|██████████| 52/52 [00:23<00:00,  2.20it/s, loss=0.106] 


Epoch 4 Loss: 6.0694
Epoch 5/5


Training Epoch 5: 100%|██████████| 52/52 [00:24<00:00,  2.13it/s, loss=0.114] 


Epoch 5 Loss: 4.3936
Test Metrics: Precision=0.9330, Recall=0.9330, F1=0.9330

Fine-tuning roberta-large (large) with Train Size 415, Split 3...


Map: 100%|██████████| 415/415 [00:00<00:00, 8997.02 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 6965.47 examples/s]
Map: 100%|██████████| 750/750 [00:00<00:00, 9262.25 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 52/52 [00:24<00:00,  2.13it/s, loss=0.478]


Epoch 1 Loss: 25.1585
Epoch 2/5


Training Epoch 2: 100%|██████████| 52/52 [00:24<00:00,  2.14it/s, loss=0.264]


Epoch 2 Loss: 11.8654
Epoch 3/5


Training Epoch 3: 100%|██████████| 52/52 [00:24<00:00,  2.16it/s, loss=0.129] 


Epoch 3 Loss: 7.3449
Epoch 4/5


Training Epoch 4: 100%|██████████| 52/52 [00:24<00:00,  2.11it/s, loss=0.089] 


Epoch 4 Loss: 4.9608
Epoch 5/5


Training Epoch 5: 100%|██████████| 52/52 [00:24<00:00,  2.13it/s, loss=0.0529]


Epoch 5 Loss: 3.5936
Test Metrics: Precision=0.9361, Recall=0.9361, F1=0.9361

Fine-tuning roberta-large (large) with Train Size 415, Split 4...


Map: 100%|██████████| 415/415 [00:00<00:00, 8446.66 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 7472.14 examples/s]
Map: 100%|██████████| 750/750 [00:00<00:00, 9090.83 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 52/52 [00:24<00:00,  2.15it/s, loss=0.252]


Epoch 1 Loss: 25.2212
Epoch 2/5


Training Epoch 2: 100%|██████████| 52/52 [00:23<00:00,  2.20it/s, loss=0.175] 


Epoch 2 Loss: 11.2722
Epoch 3/5


Training Epoch 3: 100%|██████████| 52/52 [00:24<00:00,  2.14it/s, loss=0.24]  


Epoch 3 Loss: 7.7337
Epoch 4/5


Training Epoch 4: 100%|██████████| 52/52 [00:23<00:00,  2.18it/s, loss=0.227] 


Epoch 4 Loss: 5.1581
Epoch 5/5


Training Epoch 5: 100%|██████████| 52/52 [00:24<00:00,  2.13it/s, loss=0.15]  


Epoch 5 Loss: 3.7700
Test Metrics: Precision=0.9345, Recall=0.9345, F1=0.9345

Fine-tuning roberta-large (large) with Train Size 415, Split 5...


Map: 100%|██████████| 415/415 [00:00<00:00, 8376.90 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 7061.26 examples/s]
Map: 100%|██████████| 750/750 [00:00<00:00, 9120.72 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 52/52 [00:25<00:00,  2.05it/s, loss=0.39] 


Epoch 1 Loss: 25.7929
Epoch 2/5


Training Epoch 2: 100%|██████████| 52/52 [00:25<00:00,  2.00it/s, loss=0.315]


Epoch 2 Loss: 12.7683
Epoch 3/5


Training Epoch 3: 100%|██████████| 52/52 [00:25<00:00,  2.06it/s, loss=0.111] 


Epoch 3 Loss: 8.5039
Epoch 4/5


Training Epoch 4: 100%|██████████| 52/52 [00:26<00:00,  1.97it/s, loss=0.087] 


Epoch 4 Loss: 5.8075
Epoch 5/5


Training Epoch 5: 100%|██████████| 52/52 [00:25<00:00,  2.07it/s, loss=0.0855]


Epoch 5 Loss: 4.0852
Test Metrics: Precision=0.9387, Recall=0.9387, F1=0.9387

Fine-tuning roberta-large (large) with Train Size 420, Split 1...


Map: 100%|██████████| 420/420 [00:00<00:00, 8837.87 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 8217.60 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9000.06 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 53/53 [00:24<00:00,  2.20it/s, loss=0.751]


Epoch 1 Loss: 27.1140
Epoch 2/5


Training Epoch 2: 100%|██████████| 53/53 [00:23<00:00,  2.24it/s, loss=0.158]


Epoch 2 Loss: 12.1754
Epoch 3/5


Training Epoch 3: 100%|██████████| 53/53 [00:23<00:00,  2.23it/s, loss=0.104] 


Epoch 3 Loss: 7.6468
Epoch 4/5


Training Epoch 4: 100%|██████████| 53/53 [00:23<00:00,  2.24it/s, loss=0.0363]


Epoch 4 Loss: 4.6823
Epoch 5/5


Training Epoch 5: 100%|██████████| 53/53 [00:23<00:00,  2.21it/s, loss=0.0264]


Epoch 5 Loss: 3.2892
Test Metrics: Precision=0.9349, Recall=0.9349, F1=0.9349

Fine-tuning roberta-large (large) with Train Size 420, Split 2...


Map: 100%|██████████| 420/420 [00:00<00:00, 9064.24 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7562.82 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 8809.10 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 53/53 [00:24<00:00,  2.17it/s, loss=0.306]


Epoch 1 Loss: 26.8729
Epoch 2/5


Training Epoch 2: 100%|██████████| 53/53 [00:23<00:00,  2.23it/s, loss=0.149]


Epoch 2 Loss: 13.3012
Epoch 3/5


Training Epoch 3: 100%|██████████| 53/53 [00:23<00:00,  2.25it/s, loss=0.0711]


Epoch 3 Loss: 8.8991
Epoch 4/5


Training Epoch 4: 100%|██████████| 53/53 [00:23<00:00,  2.24it/s, loss=0.0769]


Epoch 4 Loss: 5.7746
Epoch 5/5


Training Epoch 5: 100%|██████████| 53/53 [00:24<00:00,  2.19it/s, loss=0.0762]


Epoch 5 Loss: 4.3502
Test Metrics: Precision=0.9345, Recall=0.9345, F1=0.9345

Fine-tuning roberta-large (large) with Train Size 420, Split 3...


Map: 100%|██████████| 420/420 [00:00<00:00, 8901.23 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7158.24 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9391.53 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 53/53 [00:24<00:00,  2.17it/s, loss=0.325]


Epoch 1 Loss: 25.5865
Epoch 2/5


Training Epoch 2: 100%|██████████| 53/53 [00:24<00:00,  2.16it/s, loss=0.318] 


Epoch 2 Loss: 11.9715
Epoch 3/5


Training Epoch 3: 100%|██████████| 53/53 [00:25<00:00,  2.09it/s, loss=0.183] 


Epoch 3 Loss: 7.6673
Epoch 4/5


Training Epoch 4: 100%|██████████| 53/53 [00:24<00:00,  2.14it/s, loss=0.0901]


Epoch 4 Loss: 5.1030
Epoch 5/5


Training Epoch 5: 100%|██████████| 53/53 [00:24<00:00,  2.18it/s, loss=0.0543]


Epoch 5 Loss: 3.5529
Test Metrics: Precision=0.9344, Recall=0.9344, F1=0.9344

Fine-tuning roberta-large (large) with Train Size 420, Split 4...


Map: 100%|██████████| 420/420 [00:00<00:00, 8404.62 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7658.16 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9192.26 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 53/53 [00:23<00:00,  2.22it/s, loss=0.487]


Epoch 1 Loss: 25.5541
Epoch 2/5


Training Epoch 2: 100%|██████████| 53/53 [00:24<00:00,  2.18it/s, loss=0.179]


Epoch 2 Loss: 12.0246
Epoch 3/5


Training Epoch 3: 100%|██████████| 53/53 [00:24<00:00,  2.18it/s, loss=0.0578]


Epoch 3 Loss: 7.7522
Epoch 4/5


Training Epoch 4: 100%|██████████| 53/53 [00:24<00:00,  2.20it/s, loss=0.0536]


Epoch 4 Loss: 5.1100
Epoch 5/5


Training Epoch 5: 100%|██████████| 53/53 [00:24<00:00,  2.19it/s, loss=0.18]  


Epoch 5 Loss: 3.8149
Test Metrics: Precision=0.9330, Recall=0.9330, F1=0.9330

Fine-tuning roberta-large (large) with Train Size 420, Split 5...


Map: 100%|██████████| 420/420 [00:00<00:00, 8592.62 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7483.62 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9145.52 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 53/53 [00:26<00:00,  2.01it/s, loss=0.209]


Epoch 1 Loss: 28.7253
Epoch 2/5


Training Epoch 2: 100%|██████████| 53/53 [00:26<00:00,  2.03it/s, loss=0.333]


Epoch 2 Loss: 14.9643
Epoch 3/5


Training Epoch 3: 100%|██████████| 53/53 [00:26<00:00,  1.99it/s, loss=0.143] 


Epoch 3 Loss: 9.6920
Epoch 4/5


Training Epoch 4: 100%|██████████| 53/53 [00:25<00:00,  2.05it/s, loss=0.0536]


Epoch 4 Loss: 6.3916
Epoch 5/5


Training Epoch 5: 100%|██████████| 53/53 [00:26<00:00,  2.02it/s, loss=0.0917]


Epoch 5 Loss: 4.7204
Test Metrics: Precision=0.9378, Recall=0.9378, F1=0.9378

Fine-tuning roberta-large (large) with Train Size 425, Split 1...


Map: 100%|██████████| 425/425 [00:00<00:00, 8530.80 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 8333.70 examples/s]
Map: 100%|██████████| 738/738 [00:00<00:00, 8847.81 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 54/54 [00:23<00:00,  2.32it/s, loss=0.0292]


Epoch 1 Loss: 26.8062
Epoch 2/5


Training Epoch 2: 100%|██████████| 54/54 [00:24<00:00,  2.20it/s, loss=0.0499]


Epoch 2 Loss: 13.1062
Epoch 3/5


Training Epoch 3: 100%|██████████| 54/54 [00:23<00:00,  2.29it/s, loss=0.109] 


Epoch 3 Loss: 9.2910
Epoch 4/5


Training Epoch 4: 100%|██████████| 54/54 [00:24<00:00,  2.24it/s, loss=0.148] 


Epoch 4 Loss: 7.0683
Epoch 5/5


Training Epoch 5: 100%|██████████| 54/54 [00:23<00:00,  2.29it/s, loss=0.0894]


Epoch 5 Loss: 4.6484
Test Metrics: Precision=0.9353, Recall=0.9353, F1=0.9353

Fine-tuning roberta-large (large) with Train Size 425, Split 2...


Map: 100%|██████████| 425/425 [00:00<00:00, 8869.26 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 6060.82 examples/s]
Map: 100%|██████████| 738/738 [00:00<00:00, 9045.26 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 54/54 [00:24<00:00,  2.22it/s, loss=0.832]


Epoch 1 Loss: 28.6218
Epoch 2/5


Training Epoch 2: 100%|██████████| 54/54 [00:24<00:00,  2.25it/s, loss=0.0139]


Epoch 2 Loss: 13.9695
Epoch 3/5


Training Epoch 3: 100%|██████████| 54/54 [00:24<00:00,  2.22it/s, loss=0.0932]


Epoch 3 Loss: 9.0684
Epoch 4/5


Training Epoch 4: 100%|██████████| 54/54 [00:24<00:00,  2.21it/s, loss=0.0582]


Epoch 4 Loss: 6.1829
Epoch 5/5


Training Epoch 5: 100%|██████████| 54/54 [00:23<00:00,  2.28it/s, loss=0.172] 


Epoch 5 Loss: 4.5432
Test Metrics: Precision=0.9347, Recall=0.9347, F1=0.9347

Fine-tuning roberta-large (large) with Train Size 425, Split 3...


Map: 100%|██████████| 425/425 [00:00<00:00, 8440.56 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 6692.62 examples/s]
Map: 100%|██████████| 738/738 [00:00<00:00, 9511.42 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 54/54 [00:25<00:00,  2.14it/s, loss=0.566]


Epoch 1 Loss: 29.9077
Epoch 2/5


Training Epoch 2: 100%|██████████| 54/54 [00:25<00:00,  2.14it/s, loss=0.0095]


Epoch 2 Loss: 14.7226
Epoch 3/5


Training Epoch 3: 100%|██████████| 54/54 [00:24<00:00,  2.18it/s, loss=0.0787]


Epoch 3 Loss: 9.4620
Epoch 4/5


Training Epoch 4: 100%|██████████| 54/54 [00:25<00:00,  2.08it/s, loss=0.128] 


Epoch 4 Loss: 6.7610
Epoch 5/5


Training Epoch 5: 100%|██████████| 54/54 [00:24<00:00,  2.19it/s, loss=0.0339]


Epoch 5 Loss: 5.0173
Test Metrics: Precision=0.9350, Recall=0.9350, F1=0.9350

Fine-tuning roberta-large (large) with Train Size 425, Split 4...


Map: 100%|██████████| 425/425 [00:00<00:00, 8718.26 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 8275.09 examples/s]
Map: 100%|██████████| 738/738 [00:00<00:00, 8928.07 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 54/54 [00:23<00:00,  2.26it/s, loss=0.0685]


Epoch 1 Loss: 25.6970
Epoch 2/5


Training Epoch 2: 100%|██████████| 54/54 [00:24<00:00,  2.19it/s, loss=0.0419]


Epoch 2 Loss: 14.7922
Epoch 3/5


Training Epoch 3: 100%|██████████| 54/54 [00:24<00:00,  2.22it/s, loss=0.169] 


Epoch 3 Loss: 10.1588
Epoch 4/5


Training Epoch 4: 100%|██████████| 54/54 [00:24<00:00,  2.19it/s, loss=0.00966]


Epoch 4 Loss: 7.1623
Epoch 5/5


Training Epoch 5: 100%|██████████| 54/54 [00:23<00:00,  2.26it/s, loss=0.223] 


Epoch 5 Loss: 6.0646
Test Metrics: Precision=0.9326, Recall=0.9326, F1=0.9326

Fine-tuning roberta-large (large) with Train Size 425, Split 5...


Map: 100%|██████████| 425/425 [00:00<00:00, 8137.70 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 7402.43 examples/s]
Map: 100%|██████████| 738/738 [00:00<00:00, 9284.69 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 54/54 [00:26<00:00,  2.04it/s, loss=0.129]


Epoch 1 Loss: 26.5956
Epoch 2/5


Training Epoch 2: 100%|██████████| 54/54 [00:26<00:00,  2.04it/s, loss=0.478]


Epoch 2 Loss: 12.4693
Epoch 3/5


Training Epoch 3: 100%|██████████| 54/54 [00:26<00:00,  2.06it/s, loss=0.0187]


Epoch 3 Loss: 7.9860
Epoch 4/5


Training Epoch 4: 100%|██████████| 54/54 [00:25<00:00,  2.10it/s, loss=0.134] 


Epoch 4 Loss: 5.7117
Epoch 5/5


Training Epoch 5: 100%|██████████| 54/54 [00:26<00:00,  2.04it/s, loss=0.186] 


Epoch 5 Loss: 3.9698
Test Metrics: Precision=0.9385, Recall=0.9385, F1=0.9385

Fine-tuning roberta-large (large) with Train Size 430, Split 1...


Map: 100%|██████████| 430/430 [00:00<00:00, 8721.11 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 7324.06 examples/s]
Map: 100%|██████████| 732/732 [00:00<00:00, 9347.35 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 54/54 [00:24<00:00,  2.22it/s, loss=0.455]


Epoch 1 Loss: 28.8584
Epoch 2/5


Training Epoch 2: 100%|██████████| 54/54 [00:24<00:00,  2.22it/s, loss=0.412]


Epoch 2 Loss: 14.9841
Epoch 3/5


Training Epoch 3: 100%|██████████| 54/54 [00:24<00:00,  2.20it/s, loss=0.152]


Epoch 3 Loss: 10.9491
Epoch 4/5


Training Epoch 4: 100%|██████████| 54/54 [00:23<00:00,  2.28it/s, loss=0.115] 


Epoch 4 Loss: 7.2684
Epoch 5/5


Training Epoch 5: 100%|██████████| 54/54 [00:24<00:00,  2.22it/s, loss=0.12]  


Epoch 5 Loss: 5.6201
Test Metrics: Precision=0.9300, Recall=0.9300, F1=0.9300

Fine-tuning roberta-large (large) with Train Size 430, Split 2...


Map: 100%|██████████| 430/430 [00:00<00:00, 8839.55 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 6542.66 examples/s]
Map: 100%|██████████| 732/732 [00:00<00:00, 8998.88 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 54/54 [00:24<00:00,  2.18it/s, loss=0.305]


Epoch 1 Loss: 34.5714
Epoch 2/5


Training Epoch 2: 100%|██████████| 54/54 [00:24<00:00,  2.20it/s, loss=0.155]


Epoch 2 Loss: 19.8217
Epoch 3/5


Training Epoch 3: 100%|██████████| 54/54 [00:24<00:00,  2.18it/s, loss=0.187]


Epoch 3 Loss: 12.6181
Epoch 4/5


Training Epoch 4: 100%|██████████| 54/54 [00:24<00:00,  2.22it/s, loss=0.132] 


Epoch 4 Loss: 9.7902
Epoch 5/5


Training Epoch 5: 100%|██████████| 54/54 [00:24<00:00,  2.18it/s, loss=0.168] 


Epoch 5 Loss: 8.2625
Test Metrics: Precision=0.9263, Recall=0.9263, F1=0.9263

Fine-tuning roberta-large (large) with Train Size 430, Split 3...


Map: 100%|██████████| 430/430 [00:00<00:00, 8559.03 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 7400.40 examples/s]
Map: 100%|██████████| 732/732 [00:00<00:00, 9384.95 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 54/54 [00:25<00:00,  2.08it/s, loss=0.259]


Epoch 1 Loss: 27.6293
Epoch 2/5


Training Epoch 2: 100%|██████████| 54/54 [00:25<00:00,  2.13it/s, loss=0.0991]


Epoch 2 Loss: 12.0826
Epoch 3/5


Training Epoch 3: 100%|██████████| 54/54 [00:26<00:00,  2.05it/s, loss=0.103] 


Epoch 3 Loss: 8.4008
Epoch 4/5


Training Epoch 4: 100%|██████████| 54/54 [00:25<00:00,  2.11it/s, loss=0.0655]


Epoch 4 Loss: 5.5753
Epoch 5/5


Training Epoch 5: 100%|██████████| 54/54 [00:25<00:00,  2.10it/s, loss=0.0848]


Epoch 5 Loss: 3.8517
Test Metrics: Precision=0.9363, Recall=0.9363, F1=0.9363

Fine-tuning roberta-large (large) with Train Size 430, Split 4...


Map: 100%|██████████| 430/430 [00:00<00:00, 8614.30 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 6952.51 examples/s]
Map: 100%|██████████| 732/732 [00:00<00:00, 9290.50 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 54/54 [00:24<00:00,  2.18it/s, loss=0.306]


Epoch 1 Loss: 31.0298
Epoch 2/5


Training Epoch 2: 100%|██████████| 54/54 [00:25<00:00,  2.15it/s, loss=0.384]


Epoch 2 Loss: 14.7275
Epoch 3/5


Training Epoch 3: 100%|██████████| 54/54 [00:25<00:00,  2.10it/s, loss=0.163] 


Epoch 3 Loss: 9.8065
Epoch 4/5


Training Epoch 4: 100%|██████████| 54/54 [00:25<00:00,  2.11it/s, loss=0.195] 


Epoch 4 Loss: 6.9134
Epoch 5/5


Training Epoch 5: 100%|██████████| 54/54 [00:25<00:00,  2.12it/s, loss=0.0575]


Epoch 5 Loss: 6.0781
Test Metrics: Precision=0.9293, Recall=0.9293, F1=0.9293

Fine-tuning roberta-large (large) with Train Size 430, Split 5...


Map: 100%|██████████| 430/430 [00:00<00:00, 8171.18 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 6859.95 examples/s]
Map: 100%|██████████| 732/732 [00:00<00:00, 9284.85 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 54/54 [00:26<00:00,  2.05it/s, loss=0.345]


Epoch 1 Loss: 27.8585
Epoch 2/5


Training Epoch 2: 100%|██████████| 54/54 [00:26<00:00,  2.02it/s, loss=0.243]


Epoch 2 Loss: 12.6319
Epoch 3/5


Training Epoch 3: 100%|██████████| 54/54 [00:26<00:00,  2.03it/s, loss=0.108] 


Epoch 3 Loss: 8.3083
Epoch 4/5


Training Epoch 4: 100%|██████████| 54/54 [00:26<00:00,  2.01it/s, loss=0.296] 


Epoch 4 Loss: 5.6902
Epoch 5/5


Training Epoch 5: 100%|██████████| 54/54 [00:26<00:00,  2.07it/s, loss=0.0887]


Epoch 5 Loss: 3.8727
Test Metrics: Precision=0.9395, Recall=0.9395, F1=0.9395

Fine-tuning roberta-large (large) with Train Size 435, Split 1...


Map: 100%|██████████| 435/435 [00:00<00:00, 9053.44 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 7765.58 examples/s]
Map: 100%|██████████| 726/726 [00:00<00:00, 9156.00 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 55/55 [00:23<00:00,  2.30it/s, loss=0.29] 


Epoch 1 Loss: 25.4834
Epoch 2/5


Training Epoch 2: 100%|██████████| 55/55 [00:24<00:00,  2.25it/s, loss=0.283] 


Epoch 2 Loss: 12.0094
Epoch 3/5


Training Epoch 3: 100%|██████████| 55/55 [00:24<00:00,  2.23it/s, loss=0.935] 


Epoch 3 Loss: 8.1509
Epoch 4/5


Training Epoch 4: 100%|██████████| 55/55 [00:23<00:00,  2.31it/s, loss=0.0134]


Epoch 4 Loss: 5.4275
Epoch 5/5


Training Epoch 5: 100%|██████████| 55/55 [00:24<00:00,  2.26it/s, loss=0.111] 


Epoch 5 Loss: 3.4906
Test Metrics: Precision=0.9351, Recall=0.9351, F1=0.9351

Fine-tuning roberta-large (large) with Train Size 435, Split 2...


Map: 100%|██████████| 435/435 [00:00<00:00, 8447.57 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 6842.51 examples/s]
Map: 100%|██████████| 726/726 [00:00<00:00, 8880.92 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 55/55 [00:24<00:00,  2.23it/s, loss=0.175]


Epoch 1 Loss: 27.4332
Epoch 2/5


Training Epoch 2: 100%|██████████| 55/55 [00:25<00:00,  2.16it/s, loss=0.161] 


Epoch 2 Loss: 12.7233
Epoch 3/5


Training Epoch 3: 100%|██████████| 55/55 [00:24<00:00,  2.20it/s, loss=0.1]   


Epoch 3 Loss: 8.4046
Epoch 4/5


Training Epoch 4: 100%|██████████| 55/55 [00:24<00:00,  2.26it/s, loss=0.0827]


Epoch 4 Loss: 5.9047
Epoch 5/5


Training Epoch 5: 100%|██████████| 55/55 [00:25<00:00,  2.17it/s, loss=0.0969]


Epoch 5 Loss: 3.9634
Test Metrics: Precision=0.9342, Recall=0.9342, F1=0.9342

Fine-tuning roberta-large (large) with Train Size 435, Split 3...


Map: 100%|██████████| 435/435 [00:00<00:00, 8536.70 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 8027.99 examples/s]
Map: 100%|██████████| 726/726 [00:00<00:00, 9285.49 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 55/55 [00:26<00:00,  2.08it/s, loss=0.203]


Epoch 1 Loss: 25.8601
Epoch 2/5


Training Epoch 2: 100%|██████████| 55/55 [00:26<00:00,  2.11it/s, loss=0.405] 


Epoch 2 Loss: 12.8119
Epoch 3/5


Training Epoch 3: 100%|██████████| 55/55 [00:26<00:00,  2.11it/s, loss=0.125] 


Epoch 3 Loss: 8.0315
Epoch 4/5


Training Epoch 4: 100%|██████████| 55/55 [00:26<00:00,  2.11it/s, loss=0.0791]


Epoch 4 Loss: 5.4334
Epoch 5/5


Training Epoch 5: 100%|██████████| 55/55 [00:26<00:00,  2.11it/s, loss=0.074] 


Epoch 5 Loss: 4.1274
Test Metrics: Precision=0.9356, Recall=0.9356, F1=0.9356

Fine-tuning roberta-large (large) with Train Size 435, Split 4...


Map: 100%|██████████| 435/435 [00:00<00:00, 8486.39 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 6955.20 examples/s]
Map: 100%|██████████| 726/726 [00:00<00:00, 9416.92 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 55/55 [00:25<00:00,  2.13it/s, loss=0.436]


Epoch 1 Loss: 25.5873
Epoch 2/5


Training Epoch 2: 100%|██████████| 55/55 [00:25<00:00,  2.14it/s, loss=0.095]


Epoch 2 Loss: 13.2824
Epoch 3/5


Training Epoch 3: 100%|██████████| 55/55 [00:25<00:00,  2.13it/s, loss=0.103] 


Epoch 3 Loss: 8.4510
Epoch 4/5


Training Epoch 4: 100%|██████████| 55/55 [00:25<00:00,  2.13it/s, loss=0.0394]


Epoch 4 Loss: 5.3712
Epoch 5/5


Training Epoch 5: 100%|██████████| 55/55 [00:25<00:00,  2.17it/s, loss=0.15]  


Epoch 5 Loss: 3.7742
Test Metrics: Precision=0.9350, Recall=0.9350, F1=0.9350

Fine-tuning roberta-large (large) with Train Size 435, Split 5...


Map: 100%|██████████| 435/435 [00:00<00:00, 8627.07 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 6922.34 examples/s]
Map: 100%|██████████| 726/726 [00:00<00:00, 9273.87 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 55/55 [00:26<00:00,  2.04it/s, loss=0.336]


Epoch 1 Loss: 26.6768
Epoch 2/5


Training Epoch 2: 100%|██████████| 55/55 [00:27<00:00,  2.03it/s, loss=0.098] 


Epoch 2 Loss: 12.4555
Epoch 3/5


Training Epoch 3: 100%|██████████| 55/55 [00:26<00:00,  2.04it/s, loss=0.139] 


Epoch 3 Loss: 8.3942
Epoch 4/5


Training Epoch 4: 100%|██████████| 55/55 [00:26<00:00,  2.08it/s, loss=0.104] 


Epoch 4 Loss: 5.7371
Epoch 5/5


Training Epoch 5: 100%|██████████| 55/55 [00:26<00:00,  2.08it/s, loss=0.0333]


Epoch 5 Loss: 3.9539
Test Metrics: Precision=0.9387, Recall=0.9387, F1=0.9387

Fine-tuning roberta-large (large) with Train Size 440, Split 1...


Map: 100%|██████████| 440/440 [00:00<00:00, 8479.30 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7816.91 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 8928.56 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 55/55 [00:24<00:00,  2.22it/s, loss=0.209]


Epoch 1 Loss: 27.0255
Epoch 2/5


Training Epoch 2: 100%|██████████| 55/55 [00:24<00:00,  2.23it/s, loss=0.124] 


Epoch 2 Loss: 12.5428
Epoch 3/5


Training Epoch 3: 100%|██████████| 55/55 [00:24<00:00,  2.21it/s, loss=0.167] 


Epoch 3 Loss: 8.6912
Epoch 4/5


Training Epoch 4: 100%|██████████| 55/55 [00:24<00:00,  2.25it/s, loss=0.103] 


Epoch 4 Loss: 5.7790
Epoch 5/5


Training Epoch 5: 100%|██████████| 55/55 [00:24<00:00,  2.24it/s, loss=0.0784]


Epoch 5 Loss: 4.3551
Test Metrics: Precision=0.9367, Recall=0.9367, F1=0.9367

Fine-tuning roberta-large (large) with Train Size 440, Split 2...


Map: 100%|██████████| 440/440 [00:00<00:00, 7995.73 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7512.39 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9184.80 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 55/55 [00:25<00:00,  2.17it/s, loss=0.175]


Epoch 1 Loss: 27.6119
Epoch 2/5


Training Epoch 2: 100%|██████████| 55/55 [00:25<00:00,  2.12it/s, loss=0.152]


Epoch 2 Loss: 12.6113
Epoch 3/5


Training Epoch 3: 100%|██████████| 55/55 [00:24<00:00,  2.21it/s, loss=0.085] 


Epoch 3 Loss: 7.8574
Epoch 4/5


Training Epoch 4: 100%|██████████| 55/55 [00:25<00:00,  2.20it/s, loss=0.0819]


Epoch 4 Loss: 5.2806
Epoch 5/5


Training Epoch 5: 100%|██████████| 55/55 [00:24<00:00,  2.24it/s, loss=0.0957]


Epoch 5 Loss: 3.5712
Test Metrics: Precision=0.9351, Recall=0.9351, F1=0.9351

Fine-tuning roberta-large (large) with Train Size 440, Split 3...


Map: 100%|██████████| 440/440 [00:00<00:00, 8465.88 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7169.75 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9421.40 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 55/55 [00:26<00:00,  2.09it/s, loss=0.295]


Epoch 1 Loss: 27.6436
Epoch 2/5


Training Epoch 2: 100%|██████████| 55/55 [00:26<00:00,  2.05it/s, loss=0.152] 


Epoch 2 Loss: 12.0749
Epoch 3/5


Training Epoch 3: 100%|██████████| 55/55 [00:26<00:00,  2.10it/s, loss=0.0827]


Epoch 3 Loss: 8.2149
Epoch 4/5


Training Epoch 4: 100%|██████████| 55/55 [00:26<00:00,  2.11it/s, loss=0.0776]


Epoch 4 Loss: 5.7216
Epoch 5/5


Training Epoch 5: 100%|██████████| 55/55 [00:26<00:00,  2.07it/s, loss=0.0796]


Epoch 5 Loss: 3.9423
Test Metrics: Precision=0.9351, Recall=0.9351, F1=0.9351

Fine-tuning roberta-large (large) with Train Size 440, Split 4...


Map: 100%|██████████| 440/440 [00:00<00:00, 8633.20 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7024.70 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9236.94 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 55/55 [00:26<00:00,  2.12it/s, loss=0.31] 


Epoch 1 Loss: 25.4343
Epoch 2/5


Training Epoch 2: 100%|██████████| 55/55 [00:26<00:00,  2.07it/s, loss=0.11]  


Epoch 2 Loss: 12.1624
Epoch 3/5


Training Epoch 3: 100%|██████████| 55/55 [00:26<00:00,  2.11it/s, loss=0.125] 


Epoch 3 Loss: 8.3385
Epoch 4/5


Training Epoch 4: 100%|██████████| 55/55 [00:26<00:00,  2.10it/s, loss=0.135] 


Epoch 4 Loss: 6.3295
Epoch 5/5


Training Epoch 5: 100%|██████████| 55/55 [00:26<00:00,  2.09it/s, loss=0.233] 


Epoch 5 Loss: 4.6199
Test Metrics: Precision=0.9333, Recall=0.9333, F1=0.9333

Fine-tuning roberta-large (large) with Train Size 440, Split 5...


Map: 100%|██████████| 440/440 [00:00<00:00, 8142.09 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7768.54 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9076.97 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 55/55 [00:26<00:00,  2.07it/s, loss=0.389]


Epoch 1 Loss: 27.1294
Epoch 2/5


Training Epoch 2: 100%|██████████| 55/55 [00:27<00:00,  2.03it/s, loss=0.28]  


Epoch 2 Loss: 12.8693
Epoch 3/5


Training Epoch 3: 100%|██████████| 55/55 [00:27<00:00,  2.01it/s, loss=0.108] 


Epoch 3 Loss: 8.2412
Epoch 4/5


Training Epoch 4: 100%|██████████| 55/55 [00:26<00:00,  2.07it/s, loss=0.156] 


Epoch 4 Loss: 5.1107
Epoch 5/5


Training Epoch 5: 100%|██████████| 55/55 [00:26<00:00,  2.07it/s, loss=0.0765]


Epoch 5 Loss: 3.3581
Test Metrics: Precision=0.9387, Recall=0.9387, F1=0.9387

Fine-tuning roberta-large (large) with Train Size 445, Split 1...


Map: 100%|██████████| 445/445 [00:00<00:00, 8474.42 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 5489.20 examples/s]
Map: 100%|██████████| 714/714 [00:00<00:00, 9370.84 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 56/56 [00:25<00:00,  2.22it/s, loss=0.226]


Epoch 1 Loss: 27.5848
Epoch 2/5


Training Epoch 2: 100%|██████████| 56/56 [00:25<00:00,  2.22it/s, loss=0.184]


Epoch 2 Loss: 13.2944
Epoch 3/5


Training Epoch 3: 100%|██████████| 56/56 [00:24<00:00,  2.26it/s, loss=0.18]  


Epoch 3 Loss: 9.1338
Epoch 4/5


Training Epoch 4: 100%|██████████| 56/56 [00:24<00:00,  2.27it/s, loss=0.0825]


Epoch 4 Loss: 5.9792
Epoch 5/5


Training Epoch 5: 100%|██████████| 56/56 [00:24<00:00,  2.25it/s, loss=0.107] 


Epoch 5 Loss: 4.4624
Test Metrics: Precision=0.9343, Recall=0.9343, F1=0.9343

Fine-tuning roberta-large (large) with Train Size 445, Split 2...


Map: 100%|██████████| 445/445 [00:00<00:00, 8672.20 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 6942.27 examples/s]
Map: 100%|██████████| 714/714 [00:00<00:00, 9263.67 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 56/56 [00:25<00:00,  2.15it/s, loss=0.193]


Epoch 1 Loss: 28.3554
Epoch 2/5


Training Epoch 2: 100%|██████████| 56/56 [00:25<00:00,  2.18it/s, loss=0.132] 


Epoch 2 Loss: 13.3285
Epoch 3/5


Training Epoch 3: 100%|██████████| 56/56 [00:25<00:00,  2.22it/s, loss=0.13]  


Epoch 3 Loss: 8.7692
Epoch 4/5


Training Epoch 4: 100%|██████████| 56/56 [00:25<00:00,  2.20it/s, loss=0.117] 


Epoch 4 Loss: 5.9121
Epoch 5/5


Training Epoch 5: 100%|██████████| 56/56 [00:26<00:00,  2.12it/s, loss=0.0336]


Epoch 5 Loss: 4.4280
Test Metrics: Precision=0.9350, Recall=0.9350, F1=0.9350

Fine-tuning roberta-large (large) with Train Size 445, Split 3...


Map: 100%|██████████| 445/445 [00:00<00:00, 8666.76 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 6583.30 examples/s]
Map: 100%|██████████| 714/714 [00:00<00:00, 9439.84 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 56/56 [00:26<00:00,  2.11it/s, loss=0.244]


Epoch 1 Loss: 27.0995
Epoch 2/5


Training Epoch 2: 100%|██████████| 56/56 [00:25<00:00,  2.16it/s, loss=0.159]


Epoch 2 Loss: 13.2975
Epoch 3/5


Training Epoch 3: 100%|██████████| 56/56 [00:25<00:00,  2.18it/s, loss=0.114] 


Epoch 3 Loss: 8.9266
Epoch 4/5


Training Epoch 4: 100%|██████████| 56/56 [00:26<00:00,  2.10it/s, loss=0.0954]


Epoch 4 Loss: 6.1883
Epoch 5/5


Training Epoch 5: 100%|██████████| 56/56 [00:25<00:00,  2.16it/s, loss=0.0251]


Epoch 5 Loss: 4.1424
Test Metrics: Precision=0.9344, Recall=0.9344, F1=0.9344

Fine-tuning roberta-large (large) with Train Size 445, Split 4...


Map: 100%|██████████| 445/445 [00:00<00:00, 8708.41 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 7971.92 examples/s]
Map: 100%|██████████| 714/714 [00:00<00:00, 9299.40 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 56/56 [00:26<00:00,  2.13it/s, loss=0.16] 


Epoch 1 Loss: 26.8164
Epoch 2/5


Training Epoch 2: 100%|██████████| 56/56 [00:25<00:00,  2.16it/s, loss=0.13] 


Epoch 2 Loss: 15.5082
Epoch 3/5


Training Epoch 3: 100%|██████████| 56/56 [00:27<00:00,  2.07it/s, loss=0.171] 


Epoch 3 Loss: 11.6571
Epoch 4/5


Training Epoch 4: 100%|██████████| 56/56 [00:26<00:00,  2.12it/s, loss=0.175] 


Epoch 4 Loss: 9.3682
Epoch 5/5


Training Epoch 5: 100%|██████████| 56/56 [00:26<00:00,  2.09it/s, loss=0.0958]


Epoch 5 Loss: 7.6527
Test Metrics: Precision=0.9303, Recall=0.9303, F1=0.9303

Fine-tuning roberta-large (large) with Train Size 445, Split 5...


Map: 100%|██████████| 445/445 [00:00<00:00, 8705.41 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 7879.04 examples/s]
Map: 100%|██████████| 714/714 [00:00<00:00, 9418.61 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 56/56 [00:27<00:00,  2.00it/s, loss=0.372]


Epoch 1 Loss: 26.4972
Epoch 2/5


Training Epoch 2: 100%|██████████| 56/56 [00:27<00:00,  2.06it/s, loss=0.259]


Epoch 2 Loss: 13.3599
Epoch 3/5


Training Epoch 3: 100%|██████████| 56/56 [00:27<00:00,  2.04it/s, loss=0.0989]


Epoch 3 Loss: 9.5286
Epoch 4/5


Training Epoch 4: 100%|██████████| 56/56 [00:27<00:00,  2.00it/s, loss=0.0257]


Epoch 4 Loss: 6.5859
Epoch 5/5


Training Epoch 5: 100%|██████████| 56/56 [00:28<00:00,  1.97it/s, loss=0.0572]


Epoch 5 Loss: 5.2310
Test Metrics: Precision=0.9385, Recall=0.9385, F1=0.9385

Fine-tuning roberta-large (large) with Train Size 450, Split 1...


Map: 100%|██████████| 450/450 [00:00<00:00, 9062.10 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 7459.93 examples/s]
Map: 100%|██████████| 708/708 [00:00<00:00, 9058.81 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 57/57 [00:25<00:00,  2.21it/s, loss=0.109]


Epoch 1 Loss: 29.4407
Epoch 2/5


Training Epoch 2: 100%|██████████| 57/57 [00:24<00:00,  2.30it/s, loss=0.0215]


Epoch 2 Loss: 14.0992
Epoch 3/5


Training Epoch 3: 100%|██████████| 57/57 [00:25<00:00,  2.28it/s, loss=0.102] 


Epoch 3 Loss: 9.5716
Epoch 4/5


Training Epoch 4: 100%|██████████| 57/57 [00:25<00:00,  2.28it/s, loss=0.0642]


Epoch 4 Loss: 7.0052
Epoch 5/5


Training Epoch 5: 100%|██████████| 57/57 [00:25<00:00,  2.27it/s, loss=0.319] 


Epoch 5 Loss: 5.4530
Test Metrics: Precision=0.9343, Recall=0.9343, F1=0.9343

Fine-tuning roberta-large (large) with Train Size 450, Split 2...


Map: 100%|██████████| 450/450 [00:00<00:00, 8374.98 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 7068.26 examples/s]
Map: 100%|██████████| 708/708 [00:00<00:00, 9098.02 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 57/57 [00:26<00:00,  2.18it/s, loss=0.142]


Epoch 1 Loss: 25.5915
Epoch 2/5


Training Epoch 2: 100%|██████████| 57/57 [00:25<00:00,  2.22it/s, loss=0.0991]


Epoch 2 Loss: 12.8054
Epoch 3/5


Training Epoch 3: 100%|██████████| 57/57 [00:26<00:00,  2.15it/s, loss=0.121] 


Epoch 3 Loss: 8.2793
Epoch 4/5


Training Epoch 4: 100%|██████████| 57/57 [00:25<00:00,  2.20it/s, loss=0.00927]


Epoch 4 Loss: 5.4008
Epoch 5/5


Training Epoch 5: 100%|██████████| 57/57 [00:26<00:00,  2.17it/s, loss=0.0478]


Epoch 5 Loss: 3.9362
Test Metrics: Precision=0.9362, Recall=0.9362, F1=0.9362

Fine-tuning roberta-large (large) with Train Size 450, Split 3...


Map: 100%|██████████| 450/450 [00:00<00:00, 2629.32 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 8254.34 examples/s]
Map: 100%|██████████| 708/708 [00:00<00:00, 9043.74 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 57/57 [00:26<00:00,  2.16it/s, loss=0.148]


Epoch 1 Loss: 28.4965
Epoch 2/5


Training Epoch 2: 100%|██████████| 57/57 [00:27<00:00,  2.11it/s, loss=0.223]


Epoch 2 Loss: 14.3899
Epoch 3/5


Training Epoch 3: 100%|██████████| 57/57 [00:26<00:00,  2.15it/s, loss=0.133] 


Epoch 3 Loss: 11.0197
Epoch 4/5


Training Epoch 4: 100%|██████████| 57/57 [00:26<00:00,  2.13it/s, loss=0.0869]


Epoch 4 Loss: 7.8359
Epoch 5/5


Training Epoch 5: 100%|██████████| 57/57 [00:26<00:00,  2.16it/s, loss=0.0557]


Epoch 5 Loss: 6.1388
Test Metrics: Precision=0.9349, Recall=0.9349, F1=0.9349

Fine-tuning roberta-large (large) with Train Size 450, Split 4...


Map: 100%|██████████| 450/450 [00:00<00:00, 8725.25 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 7226.30 examples/s]
Map: 100%|██████████| 708/708 [00:00<00:00, 9239.73 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 57/57 [00:26<00:00,  2.15it/s, loss=0.325]


Epoch 1 Loss: 28.5445
Epoch 2/5


Training Epoch 2: 100%|██████████| 57/57 [00:27<00:00,  2.08it/s, loss=0.223]


Epoch 2 Loss: 13.3415
Epoch 3/5


Training Epoch 3: 100%|██████████| 57/57 [00:26<00:00,  2.14it/s, loss=0.188] 


Epoch 3 Loss: 9.0892
Epoch 4/5


Training Epoch 4: 100%|██████████| 57/57 [00:26<00:00,  2.18it/s, loss=0.0462]


Epoch 4 Loss: 6.5915
Epoch 5/5


Training Epoch 5: 100%|██████████| 57/57 [00:26<00:00,  2.15it/s, loss=0.0117]


Epoch 5 Loss: 4.4182
Test Metrics: Precision=0.9346, Recall=0.9346, F1=0.9346

Fine-tuning roberta-large (large) with Train Size 450, Split 5...


Map: 100%|██████████| 450/450 [00:00<00:00, 8503.31 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 7255.60 examples/s]
Map: 100%|██████████| 708/708 [00:00<00:00, 9372.78 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 57/57 [00:27<00:00,  2.06it/s, loss=0.0853]


Epoch 1 Loss: 29.9499
Epoch 2/5


Training Epoch 2: 100%|██████████| 57/57 [00:28<00:00,  2.04it/s, loss=0.375]


Epoch 2 Loss: 14.2978
Epoch 3/5


Training Epoch 3: 100%|██████████| 57/57 [00:27<00:00,  2.05it/s, loss=0.0925]


Epoch 3 Loss: 9.0617
Epoch 4/5


Training Epoch 4: 100%|██████████| 57/57 [00:27<00:00,  2.11it/s, loss=0.0622]


Epoch 4 Loss: 5.9460
Epoch 5/5


Training Epoch 5: 100%|██████████| 57/57 [00:27<00:00,  2.06it/s, loss=0.0531]


Epoch 5 Loss: 3.9780
Test Metrics: Precision=0.9398, Recall=0.9398, F1=0.9398

Fine-tuning roberta-large (large) with Train Size 455, Split 1...


Map: 100%|██████████| 455/455 [00:00<00:00, 8942.74 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 7519.49 examples/s]
Map: 100%|██████████| 702/702 [00:00<00:00, 8935.48 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 57/57 [00:25<00:00,  2.20it/s, loss=0.254]


Epoch 1 Loss: 27.1684
Epoch 2/5


Training Epoch 2: 100%|██████████| 57/57 [00:25<00:00,  2.21it/s, loss=0.308]


Epoch 2 Loss: 12.2015
Epoch 3/5


Training Epoch 3: 100%|██████████| 57/57 [00:25<00:00,  2.24it/s, loss=0.185] 


Epoch 3 Loss: 8.3231
Epoch 4/5


Training Epoch 4: 100%|██████████| 57/57 [00:25<00:00,  2.23it/s, loss=0.0431]


Epoch 4 Loss: 5.5757
Epoch 5/5


Training Epoch 5: 100%|██████████| 57/57 [00:25<00:00,  2.24it/s, loss=0.0583]


Epoch 5 Loss: 3.9006
Test Metrics: Precision=0.9352, Recall=0.9352, F1=0.9352

Fine-tuning roberta-large (large) with Train Size 455, Split 2...


Map: 100%|██████████| 455/455 [00:00<00:00, 8740.69 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 7682.34 examples/s]
Map: 100%|██████████| 702/702 [00:00<00:00, 9346.80 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 57/57 [00:26<00:00,  2.16it/s, loss=0.491]


Epoch 1 Loss: 30.6289
Epoch 2/5


Training Epoch 2: 100%|██████████| 57/57 [00:25<00:00,  2.21it/s, loss=0.314]


Epoch 2 Loss: 15.3061
Epoch 3/5


Training Epoch 3: 100%|██████████| 57/57 [00:24<00:00,  2.29it/s, loss=0.183] 


Epoch 3 Loss: 9.8620
Epoch 4/5


Training Epoch 4: 100%|██████████| 57/57 [00:25<00:00,  2.22it/s, loss=0.0865]


Epoch 4 Loss: 6.7043
Epoch 5/5


Training Epoch 5: 100%|██████████| 57/57 [00:26<00:00,  2.19it/s, loss=0.175] 


Epoch 5 Loss: 4.7668
Test Metrics: Precision=0.9368, Recall=0.9368, F1=0.9368

Fine-tuning roberta-large (large) with Train Size 455, Split 3...


Map: 100%|██████████| 455/455 [00:00<00:00, 8450.75 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 7711.83 examples/s]
Map: 100%|██████████| 702/702 [00:00<00:00, 9172.02 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 57/57 [00:26<00:00,  2.13it/s, loss=0.388]


Epoch 1 Loss: 27.8703
Epoch 2/5


Training Epoch 2: 100%|██████████| 57/57 [00:27<00:00,  2.08it/s, loss=0.146] 


Epoch 2 Loss: 12.6772
Epoch 3/5


Training Epoch 3: 100%|██████████| 57/57 [00:26<00:00,  2.17it/s, loss=0.217] 


Epoch 3 Loss: 8.8462
Epoch 4/5


Training Epoch 4: 100%|██████████| 57/57 [00:26<00:00,  2.12it/s, loss=0.0615]


Epoch 4 Loss: 5.9810
Epoch 5/5


Training Epoch 5: 100%|██████████| 57/57 [00:26<00:00,  2.11it/s, loss=0.0565]


Epoch 5 Loss: 4.2795
Test Metrics: Precision=0.9364, Recall=0.9364, F1=0.9364

Fine-tuning roberta-large (large) with Train Size 455, Split 4...


Map: 100%|██████████| 455/455 [00:00<00:00, 8905.94 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 7861.13 examples/s]
Map: 100%|██████████| 702/702 [00:00<00:00, 9450.21 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 57/57 [00:26<00:00,  2.16it/s, loss=0.237]


Epoch 1 Loss: 28.7507
Epoch 2/5


Training Epoch 2: 100%|██████████| 57/57 [00:26<00:00,  2.17it/s, loss=0.128]


Epoch 2 Loss: 13.3110
Epoch 3/5


Training Epoch 3: 100%|██████████| 57/57 [00:26<00:00,  2.13it/s, loss=0.0878]


Epoch 3 Loss: 8.6019
Epoch 4/5


Training Epoch 4: 100%|██████████| 57/57 [00:27<00:00,  2.11it/s, loss=0.32]  


Epoch 4 Loss: 5.9815
Epoch 5/5


Training Epoch 5: 100%|██████████| 57/57 [00:26<00:00,  2.14it/s, loss=0.129] 


Epoch 5 Loss: 4.5887
Test Metrics: Precision=0.9353, Recall=0.9353, F1=0.9353

Fine-tuning roberta-large (large) with Train Size 455, Split 5...


Map: 100%|██████████| 455/455 [00:00<00:00, 8070.88 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 8803.84 examples/s]
Map: 100%|██████████| 702/702 [00:00<00:00, 9131.57 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 57/57 [00:27<00:00,  2.06it/s, loss=0.36] 


Epoch 1 Loss: 27.9625
Epoch 2/5


Training Epoch 2: 100%|██████████| 57/57 [00:27<00:00,  2.04it/s, loss=0.221]


Epoch 2 Loss: 14.9416
Epoch 3/5


Training Epoch 3: 100%|██████████| 57/57 [00:28<00:00,  2.03it/s, loss=0.175] 


Epoch 3 Loss: 9.1215
Epoch 4/5


Training Epoch 4: 100%|██████████| 57/57 [00:28<00:00,  2.01it/s, loss=0.0651]


Epoch 4 Loss: 6.6251
Epoch 5/5


Training Epoch 5: 100%|██████████| 57/57 [00:28<00:00,  2.02it/s, loss=0.0665]


Epoch 5 Loss: 4.6116
Test Metrics: Precision=0.9386, Recall=0.9386, F1=0.9386

Fine-tuning roberta-large (large) with Train Size 460, Split 1...


Map: 100%|██████████| 460/460 [00:00<00:00, 8825.02 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7137.26 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9163.79 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 58/58 [00:25<00:00,  2.26it/s, loss=0.275]


Epoch 1 Loss: 28.8572
Epoch 2/5


Training Epoch 2: 100%|██████████| 58/58 [00:25<00:00,  2.24it/s, loss=0.193] 


Epoch 2 Loss: 14.7531
Epoch 3/5


Training Epoch 3: 100%|██████████| 58/58 [00:25<00:00,  2.31it/s, loss=0.177] 


Epoch 3 Loss: 9.6829
Epoch 4/5


Training Epoch 4: 100%|██████████| 58/58 [00:25<00:00,  2.27it/s, loss=0.0828]


Epoch 4 Loss: 6.2441
Epoch 5/5


Training Epoch 5: 100%|██████████| 58/58 [00:25<00:00,  2.29it/s, loss=0.132] 


Epoch 5 Loss: 4.7101
Test Metrics: Precision=0.9344, Recall=0.9344, F1=0.9344

Fine-tuning roberta-large (large) with Train Size 460, Split 2...


Map: 100%|██████████| 460/460 [00:00<00:00, 8070.90 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7379.25 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9453.73 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 58/58 [00:26<00:00,  2.20it/s, loss=0.361]


Epoch 1 Loss: 29.4293
Epoch 2/5


Training Epoch 2: 100%|██████████| 58/58 [00:27<00:00,  2.14it/s, loss=0.135]


Epoch 2 Loss: 13.8665
Epoch 3/5


Training Epoch 3: 100%|██████████| 58/58 [00:26<00:00,  2.20it/s, loss=0.0502]


Epoch 3 Loss: 9.1390
Epoch 4/5


Training Epoch 4: 100%|██████████| 58/58 [00:26<00:00,  2.20it/s, loss=0.127] 


Epoch 4 Loss: 5.7372
Epoch 5/5


Training Epoch 5: 100%|██████████| 58/58 [00:26<00:00,  2.23it/s, loss=0.0798]


Epoch 5 Loss: 4.2682
Test Metrics: Precision=0.9351, Recall=0.9351, F1=0.9351

Fine-tuning roberta-large (large) with Train Size 460, Split 3...


Map: 100%|██████████| 460/460 [00:00<00:00, 8587.50 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7780.23 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9417.38 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 58/58 [00:28<00:00,  2.06it/s, loss=0.26] 


Epoch 1 Loss: 27.7179
Epoch 2/5


Training Epoch 2: 100%|██████████| 58/58 [00:27<00:00,  2.07it/s, loss=0.188]


Epoch 2 Loss: 13.0673
Epoch 3/5


Training Epoch 3: 100%|██████████| 58/58 [00:26<00:00,  2.15it/s, loss=0.15]  


Epoch 3 Loss: 8.3812
Epoch 4/5


Training Epoch 4: 100%|██████████| 58/58 [00:27<00:00,  2.14it/s, loss=0.0566]


Epoch 4 Loss: 6.1755
Epoch 5/5


Training Epoch 5: 100%|██████████| 58/58 [00:27<00:00,  2.09it/s, loss=0.069] 


Epoch 5 Loss: 4.0554
Test Metrics: Precision=0.9349, Recall=0.9349, F1=0.9349

Fine-tuning roberta-large (large) with Train Size 460, Split 4...


Map: 100%|██████████| 460/460 [00:00<00:00, 8655.15 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7856.74 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9169.52 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 58/58 [00:26<00:00,  2.17it/s, loss=0.202]


Epoch 1 Loss: 30.1747
Epoch 2/5


Training Epoch 2: 100%|██████████| 58/58 [00:26<00:00,  2.21it/s, loss=0.178]


Epoch 2 Loss: 13.9137
Epoch 3/5


Training Epoch 3: 100%|██████████| 58/58 [00:26<00:00,  2.18it/s, loss=0.0514]


Epoch 3 Loss: 9.2763
Epoch 4/5


Training Epoch 4: 100%|██████████| 58/58 [00:26<00:00,  2.22it/s, loss=0.0907]


Epoch 4 Loss: 6.4381
Epoch 5/5


Training Epoch 5: 100%|██████████| 58/58 [00:26<00:00,  2.15it/s, loss=0.0392]


Epoch 5 Loss: 5.0399
Test Metrics: Precision=0.9335, Recall=0.9335, F1=0.9335

Fine-tuning roberta-large (large) with Train Size 460, Split 5...


Map: 100%|██████████| 460/460 [00:00<00:00, 8267.08 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 6452.45 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9346.16 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 58/58 [00:27<00:00,  2.11it/s, loss=0.21] 


Epoch 1 Loss: 29.7816
Epoch 2/5


Training Epoch 2: 100%|██████████| 58/58 [00:28<00:00,  2.04it/s, loss=0.286]


Epoch 2 Loss: 15.1864
Epoch 3/5


Training Epoch 3: 100%|██████████| 58/58 [00:28<00:00,  2.03it/s, loss=0.184] 


Epoch 3 Loss: 10.2595
Epoch 4/5


Training Epoch 4: 100%|██████████| 58/58 [00:27<00:00,  2.11it/s, loss=0.0748]


Epoch 4 Loss: 7.0926
Epoch 5/5


Training Epoch 5: 100%|██████████| 58/58 [00:28<00:00,  2.07it/s, loss=0.0553]


Epoch 5 Loss: 5.5873
Test Metrics: Precision=0.9387, Recall=0.9387, F1=0.9387

Fine-tuning roberta-large (large) with Train Size 465, Split 1...


Map: 100%|██████████| 465/465 [00:00<00:00, 8071.41 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 7679.76 examples/s]
Map: 100%|██████████| 690/690 [00:00<00:00, 8395.28 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 59/59 [00:25<00:00,  2.28it/s, loss=0.387]


Epoch 1 Loss: 29.1732
Epoch 2/5


Training Epoch 2: 100%|██████████| 59/59 [00:25<00:00,  2.34it/s, loss=0.228]


Epoch 2 Loss: 14.3858
Epoch 3/5


Training Epoch 3: 100%|██████████| 59/59 [00:25<00:00,  2.30it/s, loss=0.125] 


Epoch 3 Loss: 9.7810
Epoch 4/5


Training Epoch 4: 100%|██████████| 59/59 [00:26<00:00,  2.27it/s, loss=0.166] 


Epoch 4 Loss: 6.7742
Epoch 5/5


Training Epoch 5: 100%|██████████| 59/59 [00:25<00:00,  2.28it/s, loss=0.0394]


Epoch 5 Loss: 4.7902
Test Metrics: Precision=0.9362, Recall=0.9362, F1=0.9362

Fine-tuning roberta-large (large) with Train Size 465, Split 2...


Map: 100%|██████████| 465/465 [00:00<00:00, 8841.44 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 7884.67 examples/s]
Map: 100%|██████████| 690/690 [00:00<00:00, 9217.55 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 59/59 [00:27<00:00,  2.18it/s, loss=0.468]


Epoch 1 Loss: 31.8551
Epoch 2/5


Training Epoch 2: 100%|██████████| 59/59 [00:26<00:00,  2.22it/s, loss=0.211]


Epoch 2 Loss: 16.1902
Epoch 3/5


Training Epoch 3: 100%|██████████| 59/59 [00:27<00:00,  2.18it/s, loss=0.367] 


Epoch 3 Loss: 11.3261
Epoch 4/5


Training Epoch 4: 100%|██████████| 59/59 [00:26<00:00,  2.20it/s, loss=0.0337]


Epoch 4 Loss: 9.2938
Epoch 5/5


Training Epoch 5: 100%|██████████| 59/59 [00:26<00:00,  2.20it/s, loss=0.134] 


Epoch 5 Loss: 6.9262
Test Metrics: Precision=0.9279, Recall=0.9279, F1=0.9279

Fine-tuning roberta-large (large) with Train Size 465, Split 3...


Map: 100%|██████████| 465/465 [00:00<00:00, 7499.71 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 7898.72 examples/s]
Map: 100%|██████████| 690/690 [00:00<00:00, 9333.75 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 59/59 [00:27<00:00,  2.13it/s, loss=0.302]


Epoch 1 Loss: 27.4995
Epoch 2/5


Training Epoch 2: 100%|██████████| 59/59 [00:27<00:00,  2.13it/s, loss=0.37] 


Epoch 2 Loss: 16.0785
Epoch 3/5


Training Epoch 3: 100%|██████████| 59/59 [00:27<00:00,  2.11it/s, loss=0.217] 


Epoch 3 Loss: 10.7006
Epoch 4/5


Training Epoch 4: 100%|██████████| 59/59 [00:28<00:00,  2.10it/s, loss=1.63]  


Epoch 4 Loss: 8.5271
Epoch 5/5


Training Epoch 5: 100%|██████████| 59/59 [00:28<00:00,  2.09it/s, loss=0.838] 


Epoch 5 Loss: 6.6455
Test Metrics: Precision=0.9345, Recall=0.9345, F1=0.9345

Fine-tuning roberta-large (large) with Train Size 465, Split 4...


Map: 100%|██████████| 465/465 [00:00<00:00, 8240.25 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 8248.82 examples/s]
Map: 100%|██████████| 690/690 [00:00<00:00, 8988.88 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 59/59 [00:28<00:00,  2.09it/s, loss=0.0867]


Epoch 1 Loss: 28.8197
Epoch 2/5


Training Epoch 2: 100%|██████████| 59/59 [00:27<00:00,  2.14it/s, loss=0.346]


Epoch 2 Loss: 14.0106
Epoch 3/5


Training Epoch 3: 100%|██████████| 59/59 [00:26<00:00,  2.25it/s, loss=0.00431]


Epoch 3 Loss: 9.7205
Epoch 4/5


Training Epoch 4: 100%|██████████| 59/59 [00:26<00:00,  2.22it/s, loss=0.0484]


Epoch 4 Loss: 6.9005
Epoch 5/5


Training Epoch 5: 100%|██████████| 59/59 [00:27<00:00,  2.15it/s, loss=0.102] 


Epoch 5 Loss: 4.8909
Test Metrics: Precision=0.9344, Recall=0.9344, F1=0.9344

Fine-tuning roberta-large (large) with Train Size 465, Split 5...


Map: 100%|██████████| 465/465 [00:00<00:00, 8601.33 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 7360.37 examples/s]
Map: 100%|██████████| 690/690 [00:00<00:00, 9393.04 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 59/59 [00:28<00:00,  2.05it/s, loss=0.347]


Epoch 1 Loss: 30.0124
Epoch 2/5


Training Epoch 2: 100%|██████████| 59/59 [00:28<00:00,  2.09it/s, loss=0.218]


Epoch 2 Loss: 15.0655
Epoch 3/5


Training Epoch 3: 100%|██████████| 59/59 [00:28<00:00,  2.06it/s, loss=0.246] 


Epoch 3 Loss: 10.1656
Epoch 4/5


Training Epoch 4: 100%|██████████| 59/59 [00:28<00:00,  2.08it/s, loss=0.0084]


Epoch 4 Loss: 6.9750
Epoch 5/5


Training Epoch 5: 100%|██████████| 59/59 [00:28<00:00,  2.10it/s, loss=0.000976]


Epoch 5 Loss: 5.0099
Test Metrics: Precision=0.9403, Recall=0.9403, F1=0.9403

Fine-tuning roberta-large (large) with Train Size 470, Split 1...


Map: 100%|██████████| 470/470 [00:00<00:00, 8390.00 examples/s]
Map: 100%|██████████| 94/94 [00:00<00:00, 7163.24 examples/s]
Map: 100%|██████████| 684/684 [00:00<00:00, 9068.39 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 59/59 [00:26<00:00,  2.27it/s, loss=0.292]


Epoch 1 Loss: 29.9521
Epoch 2/5


Training Epoch 2: 100%|██████████| 59/59 [00:26<00:00,  2.20it/s, loss=0.167]


Epoch 2 Loss: 13.7327
Epoch 3/5


Training Epoch 3: 100%|██████████| 59/59 [00:25<00:00,  2.27it/s, loss=0.074] 


Epoch 3 Loss: 9.4221
Epoch 4/5


Training Epoch 4: 100%|██████████| 59/59 [00:27<00:00,  2.18it/s, loss=0.118] 


Epoch 4 Loss: 6.6819
Epoch 5/5


Training Epoch 5: 100%|██████████| 59/59 [00:25<00:00,  2.28it/s, loss=0.0615]


Epoch 5 Loss: 4.7096
Test Metrics: Precision=0.9357, Recall=0.9357, F1=0.9357

Fine-tuning roberta-large (large) with Train Size 470, Split 2...


Map: 100%|██████████| 470/470 [00:00<00:00, 8664.20 examples/s]
Map: 100%|██████████| 94/94 [00:00<00:00, 7165.97 examples/s]
Map: 100%|██████████| 684/684 [00:00<00:00, 9157.75 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 59/59 [00:26<00:00,  2.21it/s, loss=0.264]


Epoch 1 Loss: 26.7055
Epoch 2/5


Training Epoch 2: 100%|██████████| 59/59 [00:27<00:00,  2.17it/s, loss=0.254]


Epoch 2 Loss: 12.4337
Epoch 3/5


Training Epoch 3: 100%|██████████| 59/59 [00:27<00:00,  2.14it/s, loss=0.143] 


Epoch 3 Loss: 8.5569
Epoch 4/5


Training Epoch 4: 100%|██████████| 59/59 [00:27<00:00,  2.16it/s, loss=0.17]  


Epoch 4 Loss: 5.6399
Epoch 5/5


Training Epoch 5: 100%|██████████| 59/59 [00:27<00:00,  2.17it/s, loss=0.0874]


Epoch 5 Loss: 3.7124
Test Metrics: Precision=0.9371, Recall=0.9371, F1=0.9371

Fine-tuning roberta-large (large) with Train Size 470, Split 3...


Map: 100%|██████████| 470/470 [00:00<00:00, 8490.38 examples/s]
Map: 100%|██████████| 94/94 [00:00<00:00, 8393.25 examples/s]
Map: 100%|██████████| 684/684 [00:00<00:00, 9608.46 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 59/59 [00:28<00:00,  2.04it/s, loss=0.28] 


Epoch 1 Loss: 30.1664
Epoch 2/5


Training Epoch 2: 100%|██████████| 59/59 [00:27<00:00,  2.13it/s, loss=0.147]


Epoch 2 Loss: 14.2803
Epoch 3/5


Training Epoch 3: 100%|██████████| 59/59 [00:28<00:00,  2.10it/s, loss=0.139] 


Epoch 3 Loss: 9.7497
Epoch 4/5


Training Epoch 4: 100%|██████████| 59/59 [00:28<00:00,  2.08it/s, loss=0.133] 


Epoch 4 Loss: 6.5910
Epoch 5/5


Training Epoch 5: 100%|██████████| 59/59 [00:28<00:00,  2.06it/s, loss=0.0502]


Epoch 5 Loss: 4.5343
Test Metrics: Precision=0.9362, Recall=0.9362, F1=0.9362

Fine-tuning roberta-large (large) with Train Size 470, Split 4...


Map: 100%|██████████| 470/470 [00:00<00:00, 8675.64 examples/s]
Map: 100%|██████████| 94/94 [00:00<00:00, 7639.01 examples/s]
Map: 100%|██████████| 684/684 [00:00<00:00, 9158.07 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 59/59 [00:26<00:00,  2.21it/s, loss=0.251]


Epoch 1 Loss: 28.9189
Epoch 2/5


Training Epoch 2: 100%|██████████| 59/59 [00:27<00:00,  2.15it/s, loss=0.168] 


Epoch 2 Loss: 12.9667
Epoch 3/5


Training Epoch 3: 100%|██████████| 59/59 [00:27<00:00,  2.17it/s, loss=0.204] 


Epoch 3 Loss: 8.9925
Epoch 4/5


Training Epoch 4: 100%|██████████| 59/59 [00:27<00:00,  2.18it/s, loss=0.0419]


Epoch 4 Loss: 6.3472
Epoch 5/5


Training Epoch 5: 100%|██████████| 59/59 [00:27<00:00,  2.13it/s, loss=0.088] 


Epoch 5 Loss: 4.4534
Test Metrics: Precision=0.9326, Recall=0.9326, F1=0.9326

Fine-tuning roberta-large (large) with Train Size 470, Split 5...


Map: 100%|██████████| 470/470 [00:00<00:00, 8687.65 examples/s]
Map: 100%|██████████| 94/94 [00:00<00:00, 7544.58 examples/s]
Map: 100%|██████████| 684/684 [00:00<00:00, 9422.24 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 59/59 [00:28<00:00,  2.06it/s, loss=0.569]


Epoch 1 Loss: 38.1551
Epoch 2/5


Training Epoch 2: 100%|██████████| 59/59 [00:28<00:00,  2.07it/s, loss=0.498]


Epoch 2 Loss: 31.2048
Epoch 3/5


Training Epoch 3: 100%|██████████| 59/59 [00:28<00:00,  2.06it/s, loss=0.215]


Epoch 3 Loss: 18.2669
Epoch 4/5


Training Epoch 4: 100%|██████████| 59/59 [00:28<00:00,  2.04it/s, loss=0.279]


Epoch 4 Loss: 14.5665
Epoch 5/5


Training Epoch 5: 100%|██████████| 59/59 [00:28<00:00,  2.08it/s, loss=0.146]


Epoch 5 Loss: 13.8814
Test Metrics: Precision=0.9221, Recall=0.9221, F1=0.9221

Fine-tuning roberta-large (large) with Train Size 475, Split 1...


Map: 100%|██████████| 475/475 [00:00<00:00, 8496.54 examples/s]
Map: 100%|██████████| 95/95 [00:00<00:00, 6775.82 examples/s]
Map: 100%|██████████| 678/678 [00:00<00:00, 9182.23 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 60/60 [00:25<00:00,  2.33it/s, loss=0.169]


Epoch 1 Loss: 26.8645
Epoch 2/5


Training Epoch 2: 100%|██████████| 60/60 [00:26<00:00,  2.28it/s, loss=0.467]


Epoch 2 Loss: 13.1963
Epoch 3/5


Training Epoch 3: 100%|██████████| 60/60 [00:25<00:00,  2.31it/s, loss=0.112] 


Epoch 3 Loss: 8.1700
Epoch 4/5


Training Epoch 4: 100%|██████████| 60/60 [00:26<00:00,  2.25it/s, loss=0.192] 


Epoch 4 Loss: 5.9788
Epoch 5/5


Training Epoch 5: 100%|██████████| 60/60 [00:26<00:00,  2.29it/s, loss=0.0606]


Epoch 5 Loss: 4.0868
Test Metrics: Precision=0.9350, Recall=0.9350, F1=0.9350

Fine-tuning roberta-large (large) with Train Size 475, Split 2...


Map: 100%|██████████| 475/475 [00:00<00:00, 8344.09 examples/s]
Map: 100%|██████████| 95/95 [00:00<00:00, 7292.44 examples/s]
Map: 100%|██████████| 678/678 [00:00<00:00, 9090.07 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 60/60 [00:26<00:00,  2.25it/s, loss=0.344]


Epoch 1 Loss: 28.4364
Epoch 2/5


Training Epoch 2: 100%|██████████| 60/60 [00:27<00:00,  2.19it/s, loss=0.172] 


Epoch 2 Loss: 13.6246
Epoch 3/5


Training Epoch 3: 100%|██████████| 60/60 [00:27<00:00,  2.21it/s, loss=0.0548]


Epoch 3 Loss: 9.7586
Epoch 4/5


Training Epoch 4: 100%|██████████| 60/60 [00:27<00:00,  2.16it/s, loss=0.189] 


Epoch 4 Loss: 6.2981
Epoch 5/5


Training Epoch 5: 100%|██████████| 60/60 [00:27<00:00,  2.22it/s, loss=0.0217]


Epoch 5 Loss: 4.9607
Test Metrics: Precision=0.9355, Recall=0.9355, F1=0.9355

Fine-tuning roberta-large (large) with Train Size 475, Split 3...


Map: 100%|██████████| 475/475 [00:00<00:00, 7987.32 examples/s]
Map: 100%|██████████| 95/95 [00:00<00:00, 7503.37 examples/s]
Map: 100%|██████████| 678/678 [00:00<00:00, 9347.39 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 60/60 [00:28<00:00,  2.13it/s, loss=0.28]  


Epoch 1 Loss: 28.1476
Epoch 2/5


Training Epoch 2: 100%|██████████| 60/60 [00:28<00:00,  2.11it/s, loss=0.218]


Epoch 2 Loss: 13.5292
Epoch 3/5


Training Epoch 3: 100%|██████████| 60/60 [00:27<00:00,  2.16it/s, loss=0.112] 


Epoch 3 Loss: 9.4993
Epoch 4/5


Training Epoch 4: 100%|██████████| 60/60 [00:28<00:00,  2.08it/s, loss=0.14]  


Epoch 4 Loss: 6.3553
Epoch 5/5


Training Epoch 5: 100%|██████████| 60/60 [00:28<00:00,  2.12it/s, loss=0.0368]


Epoch 5 Loss: 4.7834
Test Metrics: Precision=0.9356, Recall=0.9356, F1=0.9356

Fine-tuning roberta-large (large) with Train Size 475, Split 4...


Map: 100%|██████████| 475/475 [00:00<00:00, 8349.34 examples/s]
Map: 100%|██████████| 95/95 [00:00<00:00, 7282.18 examples/s]
Map: 100%|██████████| 678/678 [00:00<00:00, 9595.29 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 60/60 [00:27<00:00,  2.16it/s, loss=0.198]


Epoch 1 Loss: 26.9464
Epoch 2/5


Training Epoch 2: 100%|██████████| 60/60 [00:27<00:00,  2.22it/s, loss=0.241]


Epoch 2 Loss: 13.7187
Epoch 3/5


Training Epoch 3: 100%|██████████| 60/60 [00:27<00:00,  2.18it/s, loss=0.126] 


Epoch 3 Loss: 8.7459
Epoch 4/5


Training Epoch 4: 100%|██████████| 60/60 [00:27<00:00,  2.17it/s, loss=0.0994]


Epoch 4 Loss: 6.5659
Epoch 5/5


Training Epoch 5: 100%|██████████| 60/60 [00:27<00:00,  2.15it/s, loss=0.263] 


Epoch 5 Loss: 4.7746
Test Metrics: Precision=0.9357, Recall=0.9357, F1=0.9357

Fine-tuning roberta-large (large) with Train Size 475, Split 5...


Map: 100%|██████████| 475/475 [00:00<00:00, 8366.34 examples/s]
Map: 100%|██████████| 95/95 [00:00<00:00, 6959.37 examples/s]
Map: 100%|██████████| 678/678 [00:00<00:00, 9564.25 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 60/60 [00:28<00:00,  2.08it/s, loss=0.465]


Epoch 1 Loss: 31.0532
Epoch 2/5


Training Epoch 2: 100%|██████████| 60/60 [00:29<00:00,  2.05it/s, loss=0.122]


Epoch 2 Loss: 16.0075
Epoch 3/5


Training Epoch 3: 100%|██████████| 60/60 [00:29<00:00,  2.05it/s, loss=0.18]  


Epoch 3 Loss: 10.7118
Epoch 4/5


Training Epoch 4: 100%|██████████| 60/60 [00:28<00:00,  2.11it/s, loss=0.168] 


Epoch 4 Loss: 7.2247
Epoch 5/5


Training Epoch 5: 100%|██████████| 60/60 [00:29<00:00,  2.03it/s, loss=0.059] 


Epoch 5 Loss: 5.2720
Test Metrics: Precision=0.9377, Recall=0.9377, F1=0.9377

Fine-tuning roberta-large (large) with Train Size 480, Split 1...


Map: 100%|██████████| 480/480 [00:00<00:00, 8642.85 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7109.12 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9098.54 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 60/60 [00:27<00:00,  2.22it/s, loss=0.52] 


Epoch 1 Loss: 32.3258
Epoch 2/5


Training Epoch 2: 100%|██████████| 60/60 [00:26<00:00,  2.30it/s, loss=0.364]


Epoch 2 Loss: 21.6414
Epoch 3/5


Training Epoch 3: 100%|██████████| 60/60 [00:27<00:00,  2.18it/s, loss=0.21]  


Epoch 3 Loss: 11.1716
Epoch 4/5


Training Epoch 4: 100%|██████████| 60/60 [00:27<00:00,  2.21it/s, loss=0.0843]


Epoch 4 Loss: 7.7994
Epoch 5/5


Training Epoch 5: 100%|██████████| 60/60 [00:27<00:00,  2.19it/s, loss=0.0831]


Epoch 5 Loss: 5.5328
Test Metrics: Precision=0.9325, Recall=0.9325, F1=0.9325

Fine-tuning roberta-large (large) with Train Size 480, Split 2...


Map: 100%|██████████| 480/480 [00:00<00:00, 7839.09 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7330.56 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 8943.25 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 60/60 [00:27<00:00,  2.17it/s, loss=0.34] 


Epoch 1 Loss: 26.0926
Epoch 2/5


Training Epoch 2: 100%|██████████| 60/60 [00:26<00:00,  2.23it/s, loss=0.172] 


Epoch 2 Loss: 13.2894
Epoch 3/5


Training Epoch 3: 100%|██████████| 60/60 [00:28<00:00,  2.14it/s, loss=0.0889]


Epoch 3 Loss: 8.1377
Epoch 4/5


Training Epoch 4: 100%|██████████| 60/60 [00:28<00:00,  2.12it/s, loss=0.0779]


Epoch 4 Loss: 5.1205
Epoch 5/5


Training Epoch 5: 100%|██████████| 60/60 [00:27<00:00,  2.17it/s, loss=0.0606]


Epoch 5 Loss: 3.2642
Test Metrics: Precision=0.9388, Recall=0.9388, F1=0.9388

Fine-tuning roberta-large (large) with Train Size 480, Split 3...


Map: 100%|██████████| 480/480 [00:00<00:00, 8568.29 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7115.90 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9539.45 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 60/60 [00:28<00:00,  2.10it/s, loss=0.333]


Epoch 1 Loss: 29.8472
Epoch 2/5


Training Epoch 2: 100%|██████████| 60/60 [00:30<00:00,  1.99it/s, loss=0.185]


Epoch 2 Loss: 13.1621
Epoch 3/5


Training Epoch 3: 100%|██████████| 60/60 [00:29<00:00,  2.03it/s, loss=0.136] 


Epoch 3 Loss: 8.6613
Epoch 4/5


Training Epoch 4: 100%|██████████| 60/60 [00:28<00:00,  2.07it/s, loss=0.0728]


Epoch 4 Loss: 5.8032
Epoch 5/5


Training Epoch 5: 100%|██████████| 60/60 [00:28<00:00,  2.12it/s, loss=0.0529]


Epoch 5 Loss: 4.2608
Test Metrics: Precision=0.9382, Recall=0.9382, F1=0.9382

Fine-tuning roberta-large (large) with Train Size 480, Split 4...


Map: 100%|██████████| 480/480 [00:00<00:00, 9003.19 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7469.82 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9444.92 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 60/60 [00:28<00:00,  2.12it/s, loss=0.253]


Epoch 1 Loss: 28.9940
Epoch 2/5


Training Epoch 2: 100%|██████████| 60/60 [00:28<00:00,  2.11it/s, loss=0.201]


Epoch 2 Loss: 14.0690
Epoch 3/5


Training Epoch 3: 100%|██████████| 60/60 [00:28<00:00,  2.12it/s, loss=0.143] 


Epoch 3 Loss: 9.6223
Epoch 4/5


Training Epoch 4: 100%|██████████| 60/60 [00:28<00:00,  2.11it/s, loss=0.108] 


Epoch 4 Loss: 6.9067
Epoch 5/5


Training Epoch 5: 100%|██████████| 60/60 [00:27<00:00,  2.20it/s, loss=0.0842]


Epoch 5 Loss: 5.0543
Test Metrics: Precision=0.9354, Recall=0.9354, F1=0.9354

Fine-tuning roberta-large (large) with Train Size 480, Split 5...


Map: 100%|██████████| 480/480 [00:00<00:00, 8443.74 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7503.79 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9038.46 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 60/60 [00:29<00:00,  2.04it/s, loss=0.224]


Epoch 1 Loss: 28.2836
Epoch 2/5


Training Epoch 2: 100%|██████████| 60/60 [00:29<00:00,  2.06it/s, loss=0.317] 


Epoch 2 Loss: 13.5040
Epoch 3/5


Training Epoch 3: 100%|██████████| 60/60 [00:29<00:00,  2.05it/s, loss=0.0743]


Epoch 3 Loss: 8.7337
Epoch 4/5


Training Epoch 4: 100%|██████████| 60/60 [00:29<00:00,  2.06it/s, loss=0.0926]


Epoch 4 Loss: 5.2953
Epoch 5/5


Training Epoch 5: 100%|██████████| 60/60 [00:29<00:00,  2.05it/s, loss=0.0342]


Epoch 5 Loss: 3.5638
Test Metrics: Precision=0.9424, Recall=0.9424, F1=0.9424

Fine-tuning roberta-large (large) with Train Size 485, Split 1...


Map: 100%|██████████| 485/485 [00:00<00:00, 8039.45 examples/s]
Map: 100%|██████████| 97/97 [00:00<00:00, 7442.15 examples/s]
Map: 100%|██████████| 666/666 [00:00<00:00, 9184.94 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 61/61 [00:27<00:00,  2.25it/s, loss=0.242]


Epoch 1 Loss: 28.6040
Epoch 2/5


Training Epoch 2: 100%|██████████| 61/61 [00:26<00:00,  2.27it/s, loss=0.277]


Epoch 2 Loss: 13.0723
Epoch 3/5


Training Epoch 3: 100%|██████████| 61/61 [00:26<00:00,  2.31it/s, loss=0.0895]


Epoch 3 Loss: 8.9613
Epoch 4/5


Training Epoch 4: 100%|██████████| 61/61 [00:27<00:00,  2.26it/s, loss=0.157] 


Epoch 4 Loss: 6.9950
Epoch 5/5


Training Epoch 5: 100%|██████████| 61/61 [00:26<00:00,  2.27it/s, loss=0.0482]


Epoch 5 Loss: 4.2210
Test Metrics: Precision=0.9362, Recall=0.9362, F1=0.9362

Fine-tuning roberta-large (large) with Train Size 485, Split 2...


Map: 100%|██████████| 485/485 [00:00<00:00, 8638.10 examples/s]
Map: 100%|██████████| 97/97 [00:00<00:00, 7341.30 examples/s]
Map: 100%|██████████| 666/666 [00:00<00:00, 8960.81 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 61/61 [00:28<00:00,  2.18it/s, loss=0.45] 


Epoch 1 Loss: 30.2143
Epoch 2/5


Training Epoch 2: 100%|██████████| 61/61 [00:28<00:00,  2.18it/s, loss=0.25] 


Epoch 2 Loss: 16.8576
Epoch 3/5


Training Epoch 3: 100%|██████████| 61/61 [00:27<00:00,  2.19it/s, loss=0.341] 


Epoch 3 Loss: 11.7771
Epoch 4/5


Training Epoch 4: 100%|██████████| 61/61 [00:28<00:00,  2.15it/s, loss=0.178] 


Epoch 4 Loss: 9.1785
Epoch 5/5


Training Epoch 5: 100%|██████████| 61/61 [00:28<00:00,  2.17it/s, loss=0.153] 


Epoch 5 Loss: 7.5687
Test Metrics: Precision=0.9304, Recall=0.9304, F1=0.9304

Fine-tuning roberta-large (large) with Train Size 485, Split 3...


Map: 100%|██████████| 485/485 [00:00<00:00, 7887.91 examples/s]
Map: 100%|██████████| 97/97 [00:00<00:00, 8177.35 examples/s]
Map: 100%|██████████| 666/666 [00:00<00:00, 3580.19 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 61/61 [00:29<00:00,  2.06it/s, loss=0.227]


Epoch 1 Loss: 26.7399
Epoch 2/5


Training Epoch 2: 100%|██████████| 61/61 [00:29<00:00,  2.07it/s, loss=0.297]


Epoch 2 Loss: 12.8126
Epoch 3/5


Training Epoch 3: 100%|██████████| 61/61 [00:28<00:00,  2.11it/s, loss=0.214] 


Epoch 3 Loss: 8.1357
Epoch 4/5


Training Epoch 4: 100%|██████████| 61/61 [00:29<00:00,  2.09it/s, loss=0.0493]


Epoch 4 Loss: 5.2967
Epoch 5/5


Training Epoch 5: 100%|██████████| 61/61 [00:29<00:00,  2.04it/s, loss=0.0796]


Epoch 5 Loss: 3.7355
Test Metrics: Precision=0.9391, Recall=0.9391, F1=0.9391

Fine-tuning roberta-large (large) with Train Size 485, Split 4...


Map: 100%|██████████| 485/485 [00:00<00:00, 8834.72 examples/s]
Map: 100%|██████████| 97/97 [00:00<00:00, 7481.70 examples/s]
Map: 100%|██████████| 666/666 [00:00<00:00, 9040.30 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 61/61 [00:29<00:00,  2.10it/s, loss=0.391]


Epoch 1 Loss: 28.7183
Epoch 2/5


Training Epoch 2: 100%|██████████| 61/61 [00:28<00:00,  2.14it/s, loss=0.215]


Epoch 2 Loss: 14.0483
Epoch 3/5


Training Epoch 3: 100%|██████████| 61/61 [00:28<00:00,  2.14it/s, loss=0.143] 


Epoch 3 Loss: 9.2037
Epoch 4/5


Training Epoch 4: 100%|██████████| 61/61 [00:28<00:00,  2.18it/s, loss=0.0812]


Epoch 4 Loss: 6.1740
Epoch 5/5


Training Epoch 5: 100%|██████████| 61/61 [00:27<00:00,  2.19it/s, loss=0.0819]


Epoch 5 Loss: 4.6432
Test Metrics: Precision=0.9350, Recall=0.9350, F1=0.9350

Fine-tuning roberta-large (large) with Train Size 485, Split 5...


Map: 100%|██████████| 485/485 [00:00<00:00, 8740.95 examples/s]
Map: 100%|██████████| 97/97 [00:00<00:00, 7104.15 examples/s]
Map: 100%|██████████| 666/666 [00:00<00:00, 9525.10 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 61/61 [00:30<00:00,  2.01it/s, loss=0.275]


Epoch 1 Loss: 28.3424
Epoch 2/5


Training Epoch 2: 100%|██████████| 61/61 [00:29<00:00,  2.08it/s, loss=0.436]


Epoch 2 Loss: 14.2232
Epoch 3/5


Training Epoch 3: 100%|██████████| 61/61 [00:29<00:00,  2.08it/s, loss=0.109] 


Epoch 3 Loss: 8.7630
Epoch 4/5


Training Epoch 4: 100%|██████████| 61/61 [00:30<00:00,  2.03it/s, loss=0.101] 


Epoch 4 Loss: 5.7235
Epoch 5/5


Training Epoch 5: 100%|██████████| 61/61 [00:29<00:00,  2.05it/s, loss=0.0619]


Epoch 5 Loss: 3.9906
Test Metrics: Precision=0.9389, Recall=0.9389, F1=0.9389

Fine-tuning roberta-large (large) with Train Size 490, Split 1...


Map: 100%|██████████| 490/490 [00:00<00:00, 8648.56 examples/s]
Map: 100%|██████████| 98/98 [00:00<00:00, 7212.27 examples/s]
Map: 100%|██████████| 660/660 [00:00<00:00, 9311.93 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 62/62 [00:27<00:00,  2.27it/s, loss=0.232]


Epoch 1 Loss: 30.2229
Epoch 2/5


Training Epoch 2: 100%|██████████| 62/62 [00:27<00:00,  2.26it/s, loss=0.28] 


Epoch 2 Loss: 15.9325
Epoch 3/5


Training Epoch 3: 100%|██████████| 62/62 [00:27<00:00,  2.24it/s, loss=0.127] 


Epoch 3 Loss: 10.9549
Epoch 4/5


Training Epoch 4: 100%|██████████| 62/62 [00:27<00:00,  2.25it/s, loss=0.118] 


Epoch 4 Loss: 7.8903
Epoch 5/5


Training Epoch 5: 100%|██████████| 62/62 [00:26<00:00,  2.31it/s, loss=0.135] 


Epoch 5 Loss: 5.8591
Test Metrics: Precision=0.9324, Recall=0.9324, F1=0.9324

Fine-tuning roberta-large (large) with Train Size 490, Split 2...


Map: 100%|██████████| 490/490 [00:00<00:00, 8921.77 examples/s]
Map: 100%|██████████| 98/98 [00:00<00:00, 7614.28 examples/s]
Map: 100%|██████████| 660/660 [00:00<00:00, 9148.97 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 62/62 [00:28<00:00,  2.18it/s, loss=0.0943]


Epoch 1 Loss: 28.6818
Epoch 2/5


Training Epoch 2: 100%|██████████| 62/62 [00:28<00:00,  2.16it/s, loss=0.304]


Epoch 2 Loss: 13.9811
Epoch 3/5


Training Epoch 3: 100%|██████████| 62/62 [00:28<00:00,  2.21it/s, loss=0.118] 


Epoch 3 Loss: 9.4775
Epoch 4/5


Training Epoch 4: 100%|██████████| 62/62 [00:28<00:00,  2.20it/s, loss=0.158] 


Epoch 4 Loss: 6.2980
Epoch 5/5


Training Epoch 5: 100%|██████████| 62/62 [00:28<00:00,  2.21it/s, loss=0.00921]


Epoch 5 Loss: 4.7183
Test Metrics: Precision=0.9357, Recall=0.9357, F1=0.9357

Fine-tuning roberta-large (large) with Train Size 490, Split 3...


Map: 100%|██████████| 490/490 [00:00<00:00, 8379.78 examples/s]
Map: 100%|██████████| 98/98 [00:00<00:00, 8392.72 examples/s]
Map: 100%|██████████| 660/660 [00:00<00:00, 9327.55 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 62/62 [00:29<00:00,  2.09it/s, loss=0.272]


Epoch 1 Loss: 29.1209
Epoch 2/5


Training Epoch 2: 100%|██████████| 62/62 [00:29<00:00,  2.10it/s, loss=0.192]


Epoch 2 Loss: 13.9491
Epoch 3/5


Training Epoch 3: 100%|██████████| 62/62 [00:29<00:00,  2.11it/s, loss=0.193] 


Epoch 3 Loss: 9.5177
Epoch 4/5


Training Epoch 4: 100%|██████████| 62/62 [00:29<00:00,  2.13it/s, loss=0.102] 


Epoch 4 Loss: 7.0105
Epoch 5/5


Training Epoch 5: 100%|██████████| 62/62 [00:29<00:00,  2.09it/s, loss=0.0709]


Epoch 5 Loss: 4.4480
Test Metrics: Precision=0.9362, Recall=0.9362, F1=0.9362

Fine-tuning roberta-large (large) with Train Size 490, Split 4...


Map: 100%|██████████| 490/490 [00:00<00:00, 8533.33 examples/s]
Map: 100%|██████████| 98/98 [00:00<00:00, 8312.44 examples/s]
Map: 100%|██████████| 660/660 [00:00<00:00, 9153.24 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 62/62 [00:28<00:00,  2.19it/s, loss=0.476]


Epoch 1 Loss: 31.1125
Epoch 2/5


Training Epoch 2: 100%|██████████| 62/62 [00:28<00:00,  2.14it/s, loss=0.163] 


Epoch 2 Loss: 16.0199
Epoch 3/5


Training Epoch 3: 100%|██████████| 62/62 [00:29<00:00,  2.13it/s, loss=0.124] 


Epoch 3 Loss: 10.9841
Epoch 4/5


Training Epoch 4: 100%|██████████| 62/62 [00:26<00:00,  2.31it/s, loss=0.136] 


Epoch 4 Loss: 8.0665
Epoch 5/5


Training Epoch 5: 100%|██████████| 62/62 [00:28<00:00,  2.16it/s, loss=0.063] 


Epoch 5 Loss: 6.5682
Test Metrics: Precision=0.9339, Recall=0.9339, F1=0.9339

Fine-tuning roberta-large (large) with Train Size 490, Split 5...


Map: 100%|██████████| 490/490 [00:00<00:00, 8341.15 examples/s]
Map: 100%|██████████| 98/98 [00:00<00:00, 7755.21 examples/s]
Map: 100%|██████████| 660/660 [00:00<00:00, 9357.76 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 62/62 [00:28<00:00,  2.14it/s, loss=0.258]


Epoch 1 Loss: 32.8495
Epoch 2/5


Training Epoch 2: 100%|██████████| 62/62 [00:29<00:00,  2.10it/s, loss=0.124]


Epoch 2 Loss: 15.4301
Epoch 3/5


Training Epoch 3: 100%|██████████| 62/62 [00:29<00:00,  2.09it/s, loss=0.125] 


Epoch 3 Loss: 10.2272
Epoch 4/5


Training Epoch 4: 100%|██████████| 62/62 [00:30<00:00,  2.02it/s, loss=0.0577]


Epoch 4 Loss: 6.9335
Epoch 5/5


Training Epoch 5: 100%|██████████| 62/62 [00:29<00:00,  2.13it/s, loss=0.137] 


Epoch 5 Loss: 4.9513
Test Metrics: Precision=0.9386, Recall=0.9386, F1=0.9386

Fine-tuning roberta-large (large) with Train Size 495, Split 1...


Map: 100%|██████████| 495/495 [00:00<00:00, 8715.57 examples/s]
Map: 100%|██████████| 99/99 [00:00<00:00, 7477.42 examples/s]
Map: 100%|██████████| 654/654 [00:00<00:00, 9042.19 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 62/62 [00:26<00:00,  2.30it/s, loss=0.207]


Epoch 1 Loss: 26.9231
Epoch 2/5


Training Epoch 2: 100%|██████████| 62/62 [00:27<00:00,  2.25it/s, loss=0.212]


Epoch 2 Loss: 13.6492
Epoch 3/5


Training Epoch 3: 100%|██████████| 62/62 [00:27<00:00,  2.25it/s, loss=0.254] 


Epoch 3 Loss: 9.2314
Epoch 4/5


Training Epoch 4: 100%|██████████| 62/62 [00:27<00:00,  2.28it/s, loss=0.0577]


Epoch 4 Loss: 6.0020
Epoch 5/5


Training Epoch 5: 100%|██████████| 62/62 [00:28<00:00,  2.21it/s, loss=0.0562]


Epoch 5 Loss: 4.1019
Test Metrics: Precision=0.9362, Recall=0.9362, F1=0.9362

Fine-tuning roberta-large (large) with Train Size 495, Split 2...


Map: 100%|██████████| 495/495 [00:00<00:00, 8967.10 examples/s]
Map: 100%|██████████| 99/99 [00:00<00:00, 7812.97 examples/s]
Map: 100%|██████████| 654/654 [00:00<00:00, 8830.28 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 62/62 [00:28<00:00,  2.17it/s, loss=0.129]


Epoch 1 Loss: 26.5788
Epoch 2/5


Training Epoch 2: 100%|██████████| 62/62 [00:28<00:00,  2.19it/s, loss=0.246] 


Epoch 2 Loss: 13.3300
Epoch 3/5


Training Epoch 3: 100%|██████████| 62/62 [00:28<00:00,  2.20it/s, loss=0.176] 


Epoch 3 Loss: 8.9033
Epoch 4/5


Training Epoch 4: 100%|██████████| 62/62 [00:28<00:00,  2.19it/s, loss=0.0841]


Epoch 4 Loss: 5.5194
Epoch 5/5


Training Epoch 5: 100%|██████████| 62/62 [00:27<00:00,  2.22it/s, loss=0.0887]


Epoch 5 Loss: 3.8156
Test Metrics: Precision=0.9368, Recall=0.9368, F1=0.9368

Fine-tuning roberta-large (large) with Train Size 495, Split 3...


Map: 100%|██████████| 495/495 [00:00<00:00, 7989.52 examples/s]
Map: 100%|██████████| 99/99 [00:00<00:00, 7628.39 examples/s]
Map: 100%|██████████| 654/654 [00:00<00:00, 9210.79 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 62/62 [00:29<00:00,  2.13it/s, loss=0.296]


Epoch 1 Loss: 30.9672
Epoch 2/5


Training Epoch 2: 100%|██████████| 62/62 [00:30<00:00,  2.05it/s, loss=0.277]


Epoch 2 Loss: 14.6778
Epoch 3/5


Training Epoch 3: 100%|██████████| 62/62 [00:29<00:00,  2.09it/s, loss=0.23]  


Epoch 3 Loss: 9.7066
Epoch 4/5


Training Epoch 4: 100%|██████████| 62/62 [00:29<00:00,  2.13it/s, loss=0.15]  


Epoch 4 Loss: 6.5811
Epoch 5/5


Training Epoch 5: 100%|██████████| 62/62 [00:29<00:00,  2.08it/s, loss=0.0774]


Epoch 5 Loss: 4.7881
Test Metrics: Precision=0.9348, Recall=0.9348, F1=0.9348

Fine-tuning roberta-large (large) with Train Size 495, Split 4...


Map: 100%|██████████| 495/495 [00:00<00:00, 8578.37 examples/s]
Map: 100%|██████████| 99/99 [00:00<00:00, 7696.83 examples/s]
Map: 100%|██████████| 654/654 [00:00<00:00, 9253.61 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 62/62 [00:28<00:00,  2.16it/s, loss=0.245]


Epoch 1 Loss: 29.0745
Epoch 2/5


Training Epoch 2: 100%|██████████| 62/62 [00:29<00:00,  2.12it/s, loss=0.307]


Epoch 2 Loss: 14.0478
Epoch 3/5


Training Epoch 3: 100%|██████████| 62/62 [00:28<00:00,  2.18it/s, loss=0.0911]


Epoch 3 Loss: 9.2296
Epoch 4/5


Training Epoch 4: 100%|██████████| 62/62 [00:28<00:00,  2.18it/s, loss=0.0823]


Epoch 4 Loss: 6.3433
Epoch 5/5


Training Epoch 5: 100%|██████████| 62/62 [00:28<00:00,  2.19it/s, loss=0.114] 


Epoch 5 Loss: 4.5060
Test Metrics: Precision=0.9359, Recall=0.9359, F1=0.9359

Fine-tuning roberta-large (large) with Train Size 495, Split 5...


Map: 100%|██████████| 495/495 [00:00<00:00, 8075.88 examples/s]
Map: 100%|██████████| 99/99 [00:00<00:00, 7617.47 examples/s]
Map: 100%|██████████| 654/654 [00:00<00:00, 9154.42 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 62/62 [00:30<00:00,  2.01it/s, loss=0.233]


Epoch 1 Loss: 29.9347
Epoch 2/5


Training Epoch 2: 100%|██████████| 62/62 [00:30<00:00,  2.06it/s, loss=0.165]


Epoch 2 Loss: 13.6394
Epoch 3/5


Training Epoch 3: 100%|██████████| 62/62 [00:29<00:00,  2.07it/s, loss=0.275] 


Epoch 3 Loss: 9.2723
Epoch 4/5


Training Epoch 4: 100%|██████████| 62/62 [00:29<00:00,  2.09it/s, loss=0.118] 


Epoch 4 Loss: 6.0169
Epoch 5/5


Training Epoch 5: 100%|██████████| 62/62 [00:30<00:00,  2.02it/s, loss=0.112] 


Epoch 5 Loss: 4.2183
Test Metrics: Precision=0.9417, Recall=0.9417, F1=0.9417

Fine-tuning roberta-large (large) with Train Size 500, Split 1...


Map: 100%|██████████| 500/500 [00:00<00:00, 8481.15 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7364.50 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9072.06 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 63/63 [00:27<00:00,  2.29it/s, loss=0.657]


Epoch 1 Loss: 36.6950
Epoch 2/5


Training Epoch 2: 100%|██████████| 63/63 [00:27<00:00,  2.29it/s, loss=0.28] 


Epoch 2 Loss: 18.8353
Epoch 3/5


Training Epoch 3: 100%|██████████| 63/63 [00:27<00:00,  2.30it/s, loss=0.0666]


Epoch 3 Loss: 13.1180
Epoch 4/5


Training Epoch 4: 100%|██████████| 63/63 [00:27<00:00,  2.29it/s, loss=0.175] 


Epoch 4 Loss: 9.6055
Epoch 5/5


Training Epoch 5: 100%|██████████| 63/63 [00:27<00:00,  2.28it/s, loss=0.118] 


Epoch 5 Loss: 7.6197
Test Metrics: Precision=0.9265, Recall=0.9265, F1=0.9265

Fine-tuning roberta-large (large) with Train Size 500, Split 2...


Map: 100%|██████████| 500/500 [00:00<00:00, 8429.88 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7763.78 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 8968.58 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 63/63 [00:28<00:00,  2.18it/s, loss=0.358]


Epoch 1 Loss: 32.2347
Epoch 2/5


Training Epoch 2: 100%|██████████| 63/63 [00:28<00:00,  2.22it/s, loss=0.288]


Epoch 2 Loss: 14.3695
Epoch 3/5


Training Epoch 3: 100%|██████████| 63/63 [00:28<00:00,  2.25it/s, loss=0.163] 


Epoch 3 Loss: 9.9625
Epoch 4/5


Training Epoch 4: 100%|██████████| 63/63 [00:28<00:00,  2.19it/s, loss=0.0354]


Epoch 4 Loss: 6.2623
Epoch 5/5


Training Epoch 5: 100%|██████████| 63/63 [00:28<00:00,  2.20it/s, loss=0.0351]


Epoch 5 Loss: 4.3831
Test Metrics: Precision=0.9369, Recall=0.9369, F1=0.9369

Fine-tuning roberta-large (large) with Train Size 500, Split 3...


Map: 100%|██████████| 500/500 [00:00<00:00, 8001.28 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 8118.58 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9447.81 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 63/63 [00:30<00:00,  2.07it/s, loss=0.213]


Epoch 1 Loss: 27.9661
Epoch 2/5


Training Epoch 2: 100%|██████████| 63/63 [00:30<00:00,  2.09it/s, loss=0.255]


Epoch 2 Loss: 13.7740
Epoch 3/5


Training Epoch 3: 100%|██████████| 63/63 [00:30<00:00,  2.09it/s, loss=0.167] 


Epoch 3 Loss: 9.0669
Epoch 4/5


Training Epoch 4: 100%|██████████| 63/63 [00:30<00:00,  2.05it/s, loss=0.0569]


Epoch 4 Loss: 5.7718
Epoch 5/5


Training Epoch 5: 100%|██████████| 63/63 [00:29<00:00,  2.12it/s, loss=0.0364]


Epoch 5 Loss: 4.2027
Test Metrics: Precision=0.9347, Recall=0.9347, F1=0.9347

Fine-tuning roberta-large (large) with Train Size 500, Split 4...


Map: 100%|██████████| 500/500 [00:00<00:00, 8728.17 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7380.57 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 8975.83 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 63/63 [00:28<00:00,  2.17it/s, loss=0.209]


Epoch 1 Loss: 29.2752
Epoch 2/5


Training Epoch 2: 100%|██████████| 63/63 [00:28<00:00,  2.21it/s, loss=0.115] 


Epoch 2 Loss: 13.6193
Epoch 3/5


Training Epoch 3: 100%|██████████| 63/63 [00:29<00:00,  2.17it/s, loss=0.167] 


Epoch 3 Loss: 10.0751
Epoch 4/5


Training Epoch 4: 100%|██████████| 63/63 [00:29<00:00,  2.10it/s, loss=0.0943]


Epoch 4 Loss: 6.9035
Epoch 5/5


Training Epoch 5: 100%|██████████| 63/63 [00:28<00:00,  2.19it/s, loss=0.142] 


Epoch 5 Loss: 5.1557
Test Metrics: Precision=0.9356, Recall=0.9356, F1=0.9356

Fine-tuning roberta-large (large) with Train Size 500, Split 5...


Map: 100%|██████████| 500/500 [00:00<00:00, 8894.38 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 8260.08 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9008.89 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5


Training Epoch 1: 100%|██████████| 63/63 [00:29<00:00,  2.11it/s, loss=0.124]


Epoch 1 Loss: 28.4851
Epoch 2/5


Training Epoch 2: 100%|██████████| 63/63 [00:30<00:00,  2.04it/s, loss=0.269] 


Epoch 2 Loss: 14.3291
Epoch 3/5


Training Epoch 3: 100%|██████████| 63/63 [00:30<00:00,  2.09it/s, loss=0.0345]


Epoch 3 Loss: 9.2853
Epoch 4/5


Training Epoch 4: 100%|██████████| 63/63 [00:31<00:00,  2.00it/s, loss=0.0735]


Epoch 4 Loss: 6.2381
Epoch 5/5


Training Epoch 5: 100%|██████████| 63/63 [00:30<00:00,  2.04it/s, loss=0.0638]


Epoch 5 Loss: 4.3945
Test Metrics: Precision=0.9398, Recall=0.9398, F1=0.9398
Results saved to Experiments_epoch5_roberta.xlsx


In [15]:
models = {
    "small": "bert-base-cased",
    "medium": "bert-large-cased",
    "large": "roberta-large"
}

iterate_and_finetune_with_torch(dataset=dataset, file_name='Experiments_moreksplits10_lesssteps20_for_smoother_graphh.xlsx', models=models, start_size=360, end_size=500, step_size=20, k_splits=10)


Fine-tuning bert-base-cased (small) with Train Size 360, Split 1...


Map: 100%|██████████| 360/360 [00:00<00:00, 8564.61 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7287.58 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9336.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:07<00:00,  6.01it/s, loss=0.439]


Epoch 1 Loss: 31.2289
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:07<00:00,  6.29it/s, loss=0.305]


Epoch 2 Loss: 15.9895
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:07<00:00,  6.29it/s, loss=0.186]


Epoch 3 Loss: 11.5540
Test Metrics: Precision=0.9055, Recall=0.9055, F1=0.9055

Fine-tuning bert-large-cased (medium) with Train Size 360, Split 1...


Map: 100%|██████████| 360/360 [00:00<00:00, 9307.46 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 6697.05 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9470.50 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:21<00:00,  2.08it/s, loss=0.482]


Epoch 1 Loss: 26.4418
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:22<00:00,  2.04it/s, loss=0.233]


Epoch 2 Loss: 13.3432
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:21<00:00,  2.11it/s, loss=0.161] 


Epoch 3 Loss: 10.1260
Test Metrics: Precision=0.9128, Recall=0.9128, F1=0.9128

Fine-tuning roberta-large (large) with Train Size 360, Split 1...


Map: 100%|██████████| 360/360 [00:00<00:00, 7691.27 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 6921.14 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9762.18 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:20<00:00,  2.22it/s, loss=0.407]


Epoch 1 Loss: 23.1315
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:19<00:00,  2.27it/s, loss=0.338]


Epoch 2 Loss: 12.5471
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:20<00:00,  2.22it/s, loss=0.205] 


Epoch 3 Loss: 8.3056
Test Metrics: Precision=0.9302, Recall=0.9302, F1=0.9302

Fine-tuning bert-base-cased (small) with Train Size 360, Split 2...


Map: 100%|██████████| 360/360 [00:00<00:00, 9278.52 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7139.07 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9529.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:07<00:00,  6.15it/s, loss=0.38] 


Epoch 1 Loss: 28.9351
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:07<00:00,  6.04it/s, loss=0.359]


Epoch 2 Loss: 15.0023
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:07<00:00,  5.99it/s, loss=0.329]


Epoch 3 Loss: 10.7698
Test Metrics: Precision=0.9048, Recall=0.9048, F1=0.9048

Fine-tuning bert-large-cased (medium) with Train Size 360, Split 2...


Map: 100%|██████████| 360/360 [00:00<00:00, 9357.88 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7419.17 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9490.61 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:23<00:00,  1.93it/s, loss=0.275]


Epoch 1 Loss: 25.6538
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:22<00:00,  1.97it/s, loss=0.335]


Epoch 2 Loss: 12.3011
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:23<00:00,  1.93it/s, loss=0.0839]


Epoch 3 Loss: 8.3352
Test Metrics: Precision=0.9144, Recall=0.9144, F1=0.9144

Fine-tuning roberta-large (large) with Train Size 360, Split 2...


Map: 100%|██████████| 360/360 [00:00<00:00, 9324.82 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 6978.71 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9642.32 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:21<00:00,  2.12it/s, loss=0.474]


Epoch 1 Loss: 22.9761
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:20<00:00,  2.14it/s, loss=0.386]


Epoch 2 Loss: 12.5611
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:21<00:00,  2.13it/s, loss=0.15]  


Epoch 3 Loss: 8.8770
Test Metrics: Precision=0.9264, Recall=0.9264, F1=0.9264

Fine-tuning bert-base-cased (small) with Train Size 360, Split 3...


Map: 100%|██████████| 360/360 [00:00<00:00, 8807.76 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7620.62 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9317.43 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:07<00:00,  6.12it/s, loss=0.54] 


Epoch 1 Loss: 31.1288
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:07<00:00,  5.98it/s, loss=0.314]


Epoch 2 Loss: 15.2919
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:07<00:00,  6.03it/s, loss=0.281]


Epoch 3 Loss: 11.3931
Test Metrics: Precision=0.9036, Recall=0.9036, F1=0.9036

Fine-tuning bert-large-cased (medium) with Train Size 360, Split 3...


Map: 100%|██████████| 360/360 [00:00<00:00, 9158.98 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7717.21 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9268.05 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:22<00:00,  2.01it/s, loss=0.269]


Epoch 1 Loss: 25.8159
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:21<00:00,  2.06it/s, loss=0.218]


Epoch 2 Loss: 12.5086
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:23<00:00,  1.95it/s, loss=0.157] 


Epoch 3 Loss: 8.3061
Test Metrics: Precision=0.9140, Recall=0.9140, F1=0.9140

Fine-tuning roberta-large (large) with Train Size 360, Split 3...


Map: 100%|██████████| 360/360 [00:00<00:00, 9433.12 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7706.77 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9641.97 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:21<00:00,  2.13it/s, loss=0.46] 


Epoch 1 Loss: 27.0083
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:21<00:00,  2.12it/s, loss=0.398]


Epoch 2 Loss: 13.2893
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:21<00:00,  2.14it/s, loss=0.136] 


Epoch 3 Loss: 10.1239
Test Metrics: Precision=0.9241, Recall=0.9241, F1=0.9241

Fine-tuning bert-base-cased (small) with Train Size 360, Split 4...


Map: 100%|██████████| 360/360 [00:00<00:00, 9148.88 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7259.37 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9406.93 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:07<00:00,  5.89it/s, loss=0.295]


Epoch 1 Loss: 30.1929
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:07<00:00,  6.04it/s, loss=0.482]


Epoch 2 Loss: 14.6972
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:07<00:00,  5.92it/s, loss=0.342]


Epoch 3 Loss: 10.6722
Test Metrics: Precision=0.9027, Recall=0.9027, F1=0.9027

Fine-tuning bert-large-cased (medium) with Train Size 360, Split 4...


Map: 100%|██████████| 360/360 [00:00<00:00, 9231.10 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7293.56 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9310.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:21<00:00,  2.05it/s, loss=0.31] 


Epoch 1 Loss: 27.5380
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:23<00:00,  1.95it/s, loss=0.241]


Epoch 2 Loss: 13.7974
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:23<00:00,  1.92it/s, loss=0.152]


Epoch 3 Loss: 9.7635
Test Metrics: Precision=0.9118, Recall=0.9118, F1=0.9118

Fine-tuning roberta-large (large) with Train Size 360, Split 4...


Map: 100%|██████████| 360/360 [00:00<00:00, 9334.97 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7345.01 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9475.56 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:20<00:00,  2.18it/s, loss=0.325]


Epoch 1 Loss: 26.1188
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:21<00:00,  2.12it/s, loss=0.205]


Epoch 2 Loss: 11.8281
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:21<00:00,  2.09it/s, loss=0.107] 


Epoch 3 Loss: 8.8584
Test Metrics: Precision=0.9240, Recall=0.9240, F1=0.9240

Fine-tuning bert-base-cased (small) with Train Size 360, Split 5...


Map: 100%|██████████| 360/360 [00:00<00:00, 9087.43 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7634.88 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9470.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:07<00:00,  5.69it/s, loss=0.576]


Epoch 1 Loss: 32.0824
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:07<00:00,  5.77it/s, loss=0.245]


Epoch 2 Loss: 16.4259
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:07<00:00,  5.71it/s, loss=0.254]


Epoch 3 Loss: 11.4743
Test Metrics: Precision=0.9057, Recall=0.9057, F1=0.9057

Fine-tuning bert-large-cased (medium) with Train Size 360, Split 5...


Map: 100%|██████████| 360/360 [00:00<00:00, 9038.04 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7452.12 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9466.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:24<00:00,  1.84it/s, loss=0.549]


Epoch 1 Loss: 28.3473
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:23<00:00,  1.88it/s, loss=0.386]


Epoch 2 Loss: 14.4781
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:22<00:00,  1.99it/s, loss=0.243]


Epoch 3 Loss: 10.0947
Test Metrics: Precision=0.9133, Recall=0.9133, F1=0.9133

Fine-tuning roberta-large (large) with Train Size 360, Split 5...


Map: 100%|██████████| 360/360 [00:00<00:00, 8773.42 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7563.74 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9653.64 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:23<00:00,  1.95it/s, loss=0.287]


Epoch 1 Loss: 26.1537
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:22<00:00,  2.00it/s, loss=0.188]


Epoch 2 Loss: 13.2822
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:22<00:00,  1.97it/s, loss=0.199] 


Epoch 3 Loss: 9.4408
Test Metrics: Precision=0.9275, Recall=0.9275, F1=0.9275

Fine-tuning bert-base-cased (small) with Train Size 360, Split 6...


Map: 100%|██████████| 360/360 [00:00<00:00, 9054.90 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 6754.57 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9183.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:07<00:00,  5.84it/s, loss=0.284]


Epoch 1 Loss: 29.7831
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:07<00:00,  6.02it/s, loss=0.185]


Epoch 2 Loss: 15.4310
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:07<00:00,  5.85it/s, loss=0.175]


Epoch 3 Loss: 11.5585
Test Metrics: Precision=0.9003, Recall=0.9003, F1=0.9003

Fine-tuning bert-large-cased (medium) with Train Size 360, Split 6...


Map: 100%|██████████| 360/360 [00:00<00:00, 9081.47 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 6884.84 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9431.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:22<00:00,  1.98it/s, loss=0.557]


Epoch 1 Loss: 25.2126
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:23<00:00,  1.93it/s, loss=0.238]


Epoch 2 Loss: 12.8802
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:22<00:00,  1.99it/s, loss=0.0966]


Epoch 3 Loss: 8.7151
Test Metrics: Precision=0.9130, Recall=0.9130, F1=0.9130

Fine-tuning roberta-large (large) with Train Size 360, Split 6...


Map: 100%|██████████| 360/360 [00:00<00:00, 9055.55 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 6805.71 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 3787.30 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:20<00:00,  2.16it/s, loss=0.327]


Epoch 1 Loss: 24.3154
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:21<00:00,  2.12it/s, loss=0.203]


Epoch 2 Loss: 11.5544
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:21<00:00,  2.14it/s, loss=0.289] 


Epoch 3 Loss: 8.1582
Test Metrics: Precision=0.9260, Recall=0.9260, F1=0.9260

Fine-tuning bert-base-cased (small) with Train Size 360, Split 7...


Map: 100%|██████████| 360/360 [00:00<00:00, 8989.83 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 8334.20 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9141.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:07<00:00,  6.06it/s, loss=0.437]


Epoch 1 Loss: 29.4391
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:07<00:00,  5.92it/s, loss=0.245]


Epoch 2 Loss: 14.8429
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:07<00:00,  6.19it/s, loss=0.145]


Epoch 3 Loss: 10.8072
Test Metrics: Precision=0.9074, Recall=0.9074, F1=0.9074

Fine-tuning bert-large-cased (medium) with Train Size 360, Split 7...


Map: 100%|██████████| 360/360 [00:00<00:00, 9023.94 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 8570.25 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9265.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:22<00:00,  2.03it/s, loss=0.32] 


Epoch 1 Loss: 27.6233
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:22<00:00,  1.99it/s, loss=0.182]


Epoch 2 Loss: 13.9231
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:22<00:00,  2.00it/s, loss=0.185] 


Epoch 3 Loss: 9.9410
Test Metrics: Precision=0.9144, Recall=0.9144, F1=0.9144

Fine-tuning roberta-large (large) with Train Size 360, Split 7...


Map: 100%|██████████| 360/360 [00:00<00:00, 9085.46 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 8230.85 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9455.14 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:21<00:00,  2.10it/s, loss=0.163]


Epoch 1 Loss: 23.8269
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:20<00:00,  2.15it/s, loss=0.313]


Epoch 2 Loss: 11.4592
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:20<00:00,  2.17it/s, loss=0.375] 


Epoch 3 Loss: 8.1298
Test Metrics: Precision=0.9293, Recall=0.9293, F1=0.9293

Fine-tuning bert-base-cased (small) with Train Size 360, Split 8...


Map: 100%|██████████| 360/360 [00:00<00:00, 8868.02 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7367.22 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 3735.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:07<00:00,  5.72it/s, loss=0.46] 


Epoch 1 Loss: 32.7454
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:07<00:00,  5.77it/s, loss=0.412]


Epoch 2 Loss: 16.4873
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:08<00:00,  5.60it/s, loss=0.189]


Epoch 3 Loss: 11.9963
Test Metrics: Precision=0.9021, Recall=0.9021, F1=0.9021

Fine-tuning bert-large-cased (medium) with Train Size 360, Split 8...


Map: 100%|██████████| 360/360 [00:00<00:00, 8744.71 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7405.71 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9413.81 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:24<00:00,  1.86it/s, loss=0.343]


Epoch 1 Loss: 27.3744
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:24<00:00,  1.85it/s, loss=0.201]


Epoch 2 Loss: 13.9178
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:24<00:00,  1.81it/s, loss=0.133] 


Epoch 3 Loss: 10.2333
Test Metrics: Precision=0.9136, Recall=0.9136, F1=0.9136

Fine-tuning roberta-large (large) with Train Size 360, Split 8...


Map: 100%|██████████| 360/360 [00:00<00:00, 8835.59 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7185.10 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9635.43 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:22<00:00,  2.02it/s, loss=0.291]


Epoch 1 Loss: 23.6320
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:22<00:00,  1.96it/s, loss=0.227]


Epoch 2 Loss: 10.6561
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:23<00:00,  1.93it/s, loss=0.0784]


Epoch 3 Loss: 7.8362
Test Metrics: Precision=0.9312, Recall=0.9312, F1=0.9312

Fine-tuning bert-base-cased (small) with Train Size 360, Split 9...


Map: 100%|██████████| 360/360 [00:00<00:00, 8993.74 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 8234.67 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9013.10 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:07<00:00,  5.81it/s, loss=0.335]


Epoch 1 Loss: 28.0512
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:07<00:00,  5.86it/s, loss=0.267]


Epoch 2 Loss: 14.3648
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:07<00:00,  6.03it/s, loss=0.201]


Epoch 3 Loss: 10.8512
Test Metrics: Precision=0.9011, Recall=0.9011, F1=0.9011

Fine-tuning bert-large-cased (medium) with Train Size 360, Split 9...


Map: 100%|██████████| 360/360 [00:00<00:00, 8911.31 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7861.25 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9124.79 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:22<00:00,  1.97it/s, loss=0.335]


Epoch 1 Loss: 26.7762
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:23<00:00,  1.90it/s, loss=0.211]


Epoch 2 Loss: 14.0109
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:23<00:00,  1.95it/s, loss=0.165]


Epoch 3 Loss: 10.4012
Test Metrics: Precision=0.9073, Recall=0.9073, F1=0.9073

Fine-tuning roberta-large (large) with Train Size 360, Split 9...


Map: 100%|██████████| 360/360 [00:00<00:00, 8978.82 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7457.28 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9272.32 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:21<00:00,  2.12it/s, loss=0.228]


Epoch 1 Loss: 21.4407
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:20<00:00,  2.15it/s, loss=0.193] 


Epoch 2 Loss: 9.9384
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:21<00:00,  2.10it/s, loss=0.104] 


Epoch 3 Loss: 6.2250
Test Metrics: Precision=0.9325, Recall=0.9325, F1=0.9325

Fine-tuning bert-base-cased (small) with Train Size 360, Split 10...


Map: 100%|██████████| 360/360 [00:00<00:00, 8659.95 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7752.47 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9249.22 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:07<00:00,  5.70it/s, loss=0.324]


Epoch 1 Loss: 28.8352
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:07<00:00,  5.89it/s, loss=0.331]


Epoch 2 Loss: 14.6462
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:07<00:00,  5.75it/s, loss=0.236]


Epoch 3 Loss: 10.9444
Test Metrics: Precision=0.9063, Recall=0.9063, F1=0.9063

Fine-tuning bert-large-cased (medium) with Train Size 360, Split 10...


Map: 100%|██████████| 360/360 [00:00<00:00, 8371.82 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7920.84 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9155.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:23<00:00,  1.92it/s, loss=0.552]


Epoch 1 Loss: 26.5877
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:24<00:00,  1.85it/s, loss=0.404]


Epoch 2 Loss: 12.1396
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:23<00:00,  1.91it/s, loss=0.0995]


Epoch 3 Loss: 8.5137
Test Metrics: Precision=0.9180, Recall=0.9180, F1=0.9180

Fine-tuning roberta-large (large) with Train Size 360, Split 10...


Map: 100%|██████████| 360/360 [00:00<00:00, 8541.31 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 8163.87 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9800.84 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:22<00:00,  2.02it/s, loss=0.501]


Epoch 1 Loss: 22.1302
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:22<00:00,  1.98it/s, loss=0.266] 


Epoch 2 Loss: 10.5176
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:22<00:00,  1.99it/s, loss=0.224] 


Epoch 3 Loss: 6.6164
Test Metrics: Precision=0.9321, Recall=0.9321, F1=0.9321

Fine-tuning bert-base-cased (small) with Train Size 380, Split 1...


Map: 100%|██████████| 380/380 [00:00<00:00, 9024.76 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7413.53 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9132.64 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:07<00:00,  6.30it/s, loss=0.5]  


Epoch 1 Loss: 33.5185
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:07<00:00,  6.20it/s, loss=0.338]


Epoch 2 Loss: 16.9769
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:07<00:00,  6.11it/s, loss=0.95] 


Epoch 3 Loss: 12.2411
Test Metrics: Precision=0.9044, Recall=0.9044, F1=0.9044

Fine-tuning bert-large-cased (medium) with Train Size 380, Split 1...


Map: 100%|██████████| 380/380 [00:00<00:00, 9221.82 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7713.48 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9498.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:23<00:00,  2.03it/s, loss=0.445]


Epoch 1 Loss: 28.9523
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:24<00:00,  1.97it/s, loss=0.242]


Epoch 2 Loss: 16.4270
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:24<00:00,  1.99it/s, loss=0.294]


Epoch 3 Loss: 12.0468
Test Metrics: Precision=0.9090, Recall=0.9090, F1=0.9090

Fine-tuning roberta-large (large) with Train Size 380, Split 1...


Map: 100%|██████████| 380/380 [00:00<00:00, 9120.14 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7276.29 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9411.57 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:21<00:00,  2.21it/s, loss=0.204]


Epoch 1 Loss: 26.4174
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:22<00:00,  2.12it/s, loss=0.224]


Epoch 2 Loss: 12.2502
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:21<00:00,  2.24it/s, loss=0.229] 


Epoch 3 Loss: 8.6684
Test Metrics: Precision=0.9278, Recall=0.9278, F1=0.9278

Fine-tuning bert-base-cased (small) with Train Size 380, Split 2...


Map: 100%|██████████| 380/380 [00:00<00:00, 8914.57 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7723.76 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9028.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:07<00:00,  6.06it/s, loss=0.414]


Epoch 1 Loss: 30.6762
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:07<00:00,  6.14it/s, loss=0.306]


Epoch 2 Loss: 16.0769
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:07<00:00,  6.15it/s, loss=0.164]


Epoch 3 Loss: 11.4212
Test Metrics: Precision=0.9053, Recall=0.9053, F1=0.9053

Fine-tuning bert-large-cased (medium) with Train Size 380, Split 2...


Map: 100%|██████████| 380/380 [00:00<00:00, 8986.09 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7756.83 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9107.73 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:24<00:00,  1.98it/s, loss=0.402]


Epoch 1 Loss: 27.8483
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:24<00:00,  1.98it/s, loss=0.334]


Epoch 2 Loss: 14.6245
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:24<00:00,  1.95it/s, loss=0.132] 


Epoch 3 Loss: 10.0877
Test Metrics: Precision=0.9101, Recall=0.9101, F1=0.9101

Fine-tuning roberta-large (large) with Train Size 380, Split 2...


Map: 100%|██████████| 380/380 [00:00<00:00, 9168.77 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7995.56 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9196.02 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:22<00:00,  2.17it/s, loss=0.295]


Epoch 1 Loss: 26.7740
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:22<00:00,  2.10it/s, loss=0.22]  


Epoch 2 Loss: 11.7591
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:22<00:00,  2.14it/s, loss=0.192] 


Epoch 3 Loss: 7.9693
Test Metrics: Precision=0.9294, Recall=0.9294, F1=0.9294

Fine-tuning bert-base-cased (small) with Train Size 380, Split 3...


Map: 100%|██████████| 380/380 [00:00<00:00, 8654.29 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 8093.00 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9058.48 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:07<00:00,  6.07it/s, loss=0.598]


Epoch 1 Loss: 31.9749
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:07<00:00,  6.15it/s, loss=0.264]


Epoch 2 Loss: 15.4628
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:07<00:00,  6.08it/s, loss=0.182]


Epoch 3 Loss: 11.3492
Test Metrics: Precision=0.9056, Recall=0.9056, F1=0.9056

Fine-tuning bert-large-cased (medium) with Train Size 380, Split 3...


Map: 100%|██████████| 380/380 [00:00<00:00, 9046.63 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 8344.25 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9147.61 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:24<00:00,  1.95it/s, loss=0.994]


Epoch 1 Loss: 29.4128
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:24<00:00,  1.99it/s, loss=0.286]


Epoch 2 Loss: 17.3489
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:24<00:00,  1.99it/s, loss=0.317]


Epoch 3 Loss: 18.0922
Test Metrics: Precision=0.8700, Recall=0.8700, F1=0.8700

Fine-tuning roberta-large (large) with Train Size 380, Split 3...


Map: 100%|██████████| 380/380 [00:00<00:00, 9108.31 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 8217.98 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9414.85 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s, loss=0.443]


Epoch 1 Loss: 23.8307
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:22<00:00,  2.11it/s, loss=0.167]


Epoch 2 Loss: 12.3968
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:22<00:00,  2.17it/s, loss=0.0894]


Epoch 3 Loss: 8.3744
Test Metrics: Precision=0.9304, Recall=0.9304, F1=0.9304

Fine-tuning bert-base-cased (small) with Train Size 380, Split 4...


Map: 100%|██████████| 380/380 [00:00<00:00, 9166.98 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7561.43 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 8798.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:07<00:00,  6.22it/s, loss=0.565]


Epoch 1 Loss: 33.0349
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:07<00:00,  6.32it/s, loss=0.443]


Epoch 2 Loss: 16.2542
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:07<00:00,  6.22it/s, loss=0.332]


Epoch 3 Loss: 11.8890
Test Metrics: Precision=0.9025, Recall=0.9025, F1=0.9025

Fine-tuning bert-large-cased (medium) with Train Size 380, Split 4...


Map: 100%|██████████| 380/380 [00:00<00:00, 8901.37 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7746.28 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 8971.48 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:24<00:00,  1.97it/s, loss=0.421]


Epoch 1 Loss: 27.0366
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:24<00:00,  1.95it/s, loss=0.213]


Epoch 2 Loss: 13.1392
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:24<00:00,  1.97it/s, loss=0.118] 


Epoch 3 Loss: 8.7838
Test Metrics: Precision=0.9145, Recall=0.9145, F1=0.9145

Fine-tuning roberta-large (large) with Train Size 380, Split 4...


Map: 100%|██████████| 380/380 [00:00<00:00, 9611.03 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7793.05 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9273.29 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:22<00:00,  2.18it/s, loss=0.274]


Epoch 1 Loss: 26.2071
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:22<00:00,  2.17it/s, loss=0.45] 


Epoch 2 Loss: 14.2185
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:22<00:00,  2.17it/s, loss=0.13] 


Epoch 3 Loss: 10.4002
Test Metrics: Precision=0.9237, Recall=0.9237, F1=0.9237

Fine-tuning bert-base-cased (small) with Train Size 380, Split 5...


Map: 100%|██████████| 380/380 [00:00<00:00, 8642.47 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 6709.90 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 3653.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:08<00:00,  5.87it/s, loss=0.543]


Epoch 1 Loss: 33.9436
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:08<00:00,  5.92it/s, loss=0.348]


Epoch 2 Loss: 15.5407
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:08<00:00,  5.92it/s, loss=0.279]


Epoch 3 Loss: 11.3461
Test Metrics: Precision=0.9097, Recall=0.9097, F1=0.9097

Fine-tuning bert-large-cased (medium) with Train Size 380, Split 5...


Map: 100%|██████████| 380/380 [00:00<00:00, 8921.80 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 6801.82 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9257.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:25<00:00,  1.87it/s, loss=0.689]


Epoch 1 Loss: 29.4607
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:25<00:00,  1.89it/s, loss=0.344]


Epoch 2 Loss: 15.3324
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:25<00:00,  1.91it/s, loss=0.136]


Epoch 3 Loss: 10.9891
Test Metrics: Precision=0.9100, Recall=0.9100, F1=0.9100

Fine-tuning roberta-large (large) with Train Size 380, Split 5...


Map: 100%|██████████| 380/380 [00:00<00:00, 8793.72 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 6991.27 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9641.21 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s, loss=0.497]


Epoch 1 Loss: 25.3580
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:23<00:00,  2.05it/s, loss=0.218]


Epoch 2 Loss: 13.2010
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s, loss=0.2]   


Epoch 3 Loss: 8.7132
Test Metrics: Precision=0.9322, Recall=0.9322, F1=0.9322

Fine-tuning bert-base-cased (small) with Train Size 380, Split 6...


Map: 100%|██████████| 380/380 [00:00<00:00, 8664.69 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7585.00 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9302.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:07<00:00,  6.01it/s, loss=0.42] 


Epoch 1 Loss: 30.9841
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:08<00:00,  5.93it/s, loss=0.583]


Epoch 2 Loss: 15.7562
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:07<00:00,  6.04it/s, loss=0.174] 


Epoch 3 Loss: 11.7317
Test Metrics: Precision=0.9048, Recall=0.9048, F1=0.9048

Fine-tuning bert-large-cased (medium) with Train Size 380, Split 6...


Map: 100%|██████████| 380/380 [00:00<00:00, 8886.04 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7504.29 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9240.82 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:25<00:00,  1.92it/s, loss=0.282]


Epoch 1 Loss: 32.7045
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:23<00:00,  2.01it/s, loss=0.297]


Epoch 2 Loss: 20.3423
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:24<00:00,  1.99it/s, loss=0.13] 


Epoch 3 Loss: 16.0806
Test Metrics: Precision=0.8848, Recall=0.8848, F1=0.8848

Fine-tuning roberta-large (large) with Train Size 380, Split 6...


Map: 100%|██████████| 380/380 [00:00<00:00, 8712.86 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7606.54 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9315.27 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:22<00:00,  2.15it/s, loss=0.255]


Epoch 1 Loss: 25.5303
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:23<00:00,  2.05it/s, loss=0.287]


Epoch 2 Loss: 11.8749
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:22<00:00,  2.13it/s, loss=0.0685]


Epoch 3 Loss: 8.6568
Test Metrics: Precision=0.9262, Recall=0.9262, F1=0.9262

Fine-tuning bert-base-cased (small) with Train Size 380, Split 7...


Map: 100%|██████████| 380/380 [00:00<00:00, 8858.28 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 6793.12 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9108.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:07<00:00,  6.06it/s, loss=0.463]


Epoch 1 Loss: 31.4077
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:07<00:00,  6.04it/s, loss=0.0946]


Epoch 2 Loss: 15.2132
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:07<00:00,  6.23it/s, loss=0.198]


Epoch 3 Loss: 11.2159
Test Metrics: Precision=0.9090, Recall=0.9090, F1=0.9090

Fine-tuning bert-large-cased (medium) with Train Size 380, Split 7...


Map: 100%|██████████| 380/380 [00:00<00:00, 8820.00 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 6784.88 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9343.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:23<00:00,  2.02it/s, loss=0.316]


Epoch 1 Loss: 26.3295
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:24<00:00,  1.99it/s, loss=0.23] 


Epoch 2 Loss: 13.4393
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:24<00:00,  2.00it/s, loss=0.225] 


Epoch 3 Loss: 9.0138
Test Metrics: Precision=0.9177, Recall=0.9177, F1=0.9177

Fine-tuning roberta-large (large) with Train Size 380, Split 7...


Map: 100%|██████████| 380/380 [00:00<00:00, 9008.94 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 6859.78 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9474.33 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s, loss=0.443]


Epoch 1 Loss: 26.9917
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s, loss=0.245]


Epoch 2 Loss: 13.0012
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:22<00:00,  2.13it/s, loss=0.136] 


Epoch 3 Loss: 8.3521
Test Metrics: Precision=0.9301, Recall=0.9301, F1=0.9301

Fine-tuning bert-base-cased (small) with Train Size 380, Split 8...


Map: 100%|██████████| 380/380 [00:00<00:00, 8680.74 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 6892.11 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9069.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:08<00:00,  5.65it/s, loss=0.366]


Epoch 1 Loss: 32.2578
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:08<00:00,  5.74it/s, loss=0.184]


Epoch 2 Loss: 16.2909
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:08<00:00,  5.85it/s, loss=0.128]


Epoch 3 Loss: 12.3398
Test Metrics: Precision=0.9088, Recall=0.9088, F1=0.9088

Fine-tuning bert-large-cased (medium) with Train Size 380, Split 8...


Map: 100%|██████████| 380/380 [00:00<00:00, 8855.77 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7465.62 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9480.84 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:26<00:00,  1.83it/s, loss=0.475]


Epoch 1 Loss: 29.1246
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:26<00:00,  1.81it/s, loss=0.347]


Epoch 2 Loss: 16.6710
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:25<00:00,  1.88it/s, loss=0.0564]


Epoch 3 Loss: 11.4767
Test Metrics: Precision=0.9144, Recall=0.9144, F1=0.9144

Fine-tuning roberta-large (large) with Train Size 380, Split 8...


Map: 100%|██████████| 380/380 [00:00<00:00, 8692.43 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7070.99 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9346.85 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:23<00:00,  2.06it/s, loss=0.545]


Epoch 1 Loss: 25.8691
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:23<00:00,  2.02it/s, loss=0.256]


Epoch 2 Loss: 12.2232
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:23<00:00,  2.00it/s, loss=0.153] 


Epoch 3 Loss: 8.5054
Test Metrics: Precision=0.9306, Recall=0.9306, F1=0.9306

Fine-tuning bert-base-cased (small) with Train Size 380, Split 9...


Map: 100%|██████████| 380/380 [00:00<00:00, 8963.15 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7759.10 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9125.24 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:07<00:00,  6.04it/s, loss=0.628]


Epoch 1 Loss: 32.7812
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:07<00:00,  6.12it/s, loss=0.12] 


Epoch 2 Loss: 15.4894
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:07<00:00,  6.06it/s, loss=0.179] 


Epoch 3 Loss: 10.9680
Test Metrics: Precision=0.9023, Recall=0.9023, F1=0.9023

Fine-tuning bert-large-cased (medium) with Train Size 380, Split 9...


Map: 100%|██████████| 380/380 [00:00<00:00, 8679.13 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7927.36 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9201.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:24<00:00,  1.97it/s, loss=0.186]


Epoch 1 Loss: 25.5603
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s, loss=0.204]


Epoch 2 Loss: 12.2771
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:24<00:00,  1.97it/s, loss=0.205] 


Epoch 3 Loss: 7.9532
Test Metrics: Precision=0.9155, Recall=0.9155, F1=0.9155

Fine-tuning roberta-large (large) with Train Size 380, Split 9...


Map: 100%|██████████| 380/380 [00:00<00:00, 9052.59 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7557.30 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9376.32 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:22<00:00,  2.10it/s, loss=0.392]


Epoch 1 Loss: 24.3174
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:22<00:00,  2.11it/s, loss=0.336] 


Epoch 2 Loss: 11.8929
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:22<00:00,  2.10it/s, loss=0.154] 


Epoch 3 Loss: 7.8448
Test Metrics: Precision=0.9296, Recall=0.9296, F1=0.9296

Fine-tuning bert-base-cased (small) with Train Size 380, Split 10...


Map: 100%|██████████| 380/380 [00:00<00:00, 8584.01 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 6837.41 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9284.18 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:08<00:00,  5.93it/s, loss=0.509]


Epoch 1 Loss: 29.7329
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:08<00:00,  5.91it/s, loss=0.519]


Epoch 2 Loss: 15.2957
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:08<00:00,  5.90it/s, loss=0.231]


Epoch 3 Loss: 11.0054
Test Metrics: Precision=0.9072, Recall=0.9072, F1=0.9072

Fine-tuning bert-large-cased (medium) with Train Size 380, Split 10...


Map: 100%|██████████| 380/380 [00:00<00:00, 8852.13 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 6994.19 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 3872.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:25<00:00,  1.87it/s, loss=0.486]


Epoch 1 Loss: 27.0672
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:24<00:00,  1.98it/s, loss=0.18] 


Epoch 2 Loss: 12.1640
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:25<00:00,  1.91it/s, loss=0.105] 


Epoch 3 Loss: 7.9359
Test Metrics: Precision=0.9168, Recall=0.9168, F1=0.9168

Fine-tuning roberta-large (large) with Train Size 380, Split 10...


Map: 100%|██████████| 380/380 [00:00<00:00, 8676.72 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7088.28 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9737.41 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:23<00:00,  2.04it/s, loss=0.425]


Epoch 1 Loss: 23.0248
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:23<00:00,  2.07it/s, loss=0.0891]


Epoch 2 Loss: 11.0392
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:23<00:00,  2.04it/s, loss=0.194] 


Epoch 3 Loss: 7.4868
Test Metrics: Precision=0.9345, Recall=0.9345, F1=0.9345

Fine-tuning bert-base-cased (small) with Train Size 400, Split 1...


Map: 100%|██████████| 400/400 [00:00<00:00, 9078.92 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7894.42 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9288.18 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:08<00:00,  6.21it/s, loss=0.691]


Epoch 1 Loss: 32.6487
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:08<00:00,  6.17it/s, loss=0.264]


Epoch 2 Loss: 16.3544
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:07<00:00,  6.31it/s, loss=0.285]


Epoch 3 Loss: 11.7854
Test Metrics: Precision=0.9092, Recall=0.9092, F1=0.9092

Fine-tuning bert-large-cased (medium) with Train Size 400, Split 1...


Map: 100%|██████████| 400/400 [00:00<00:00, 8980.66 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7854.50 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9334.53 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:24<00:00,  2.04it/s, loss=0.228]


Epoch 1 Loss: 30.0314
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:24<00:00,  2.01it/s, loss=0.269]


Epoch 2 Loss: 15.8181
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:25<00:00,  1.96it/s, loss=0.21] 


Epoch 3 Loss: 11.5714
Test Metrics: Precision=0.9132, Recall=0.9132, F1=0.9132

Fine-tuning roberta-large (large) with Train Size 400, Split 1...


Map: 100%|██████████| 400/400 [00:00<00:00, 9119.44 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7612.51 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9176.76 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:22<00:00,  2.24it/s, loss=0.346]


Epoch 1 Loss: 28.7829
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:22<00:00,  2.24it/s, loss=0.413]


Epoch 2 Loss: 12.5650
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:22<00:00,  2.20it/s, loss=0.266] 


Epoch 3 Loss: 8.6141
Test Metrics: Precision=0.9295, Recall=0.9295, F1=0.9295

Fine-tuning bert-base-cased (small) with Train Size 400, Split 2...


Map: 100%|██████████| 400/400 [00:00<00:00, 8887.27 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7847.89 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 8824.26 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:08<00:00,  6.14it/s, loss=0.547]


Epoch 1 Loss: 32.9514
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:07<00:00,  6.39it/s, loss=0.258]


Epoch 2 Loss: 15.8711
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:07<00:00,  6.37it/s, loss=0.163]


Epoch 3 Loss: 11.0820
Test Metrics: Precision=0.9054, Recall=0.9054, F1=0.9054

Fine-tuning bert-large-cased (medium) with Train Size 400, Split 2...


Map: 100%|██████████| 400/400 [00:00<00:00, 9090.93 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7859.84 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9066.21 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:25<00:00,  1.97it/s, loss=0.364]


Epoch 1 Loss: 27.5656
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:24<00:00,  2.03it/s, loss=0.301]


Epoch 2 Loss: 14.2613
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:25<00:00,  1.98it/s, loss=0.233] 


Epoch 3 Loss: 10.0073
Test Metrics: Precision=0.9136, Recall=0.9136, F1=0.9136

Fine-tuning roberta-large (large) with Train Size 400, Split 2...


Map: 100%|██████████| 400/400 [00:00<00:00, 9173.04 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7887.00 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9132.92 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:23<00:00,  2.15it/s, loss=0.345]


Epoch 1 Loss: 24.7573
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:22<00:00,  2.21it/s, loss=0.208]


Epoch 2 Loss: 11.8609
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:23<00:00,  2.10it/s, loss=0.277] 


Epoch 3 Loss: 7.7205
Test Metrics: Precision=0.9264, Recall=0.9264, F1=0.9264

Fine-tuning bert-base-cased (small) with Train Size 400, Split 3...


Map: 100%|██████████| 400/400 [00:00<00:00, 8933.84 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7320.70 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9091.31 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:08<00:00,  5.98it/s, loss=0.269]


Epoch 1 Loss: 34.3303
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:08<00:00,  5.97it/s, loss=0.208]


Epoch 2 Loss: 16.5407
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:08<00:00,  6.05it/s, loss=0.181] 


Epoch 3 Loss: 12.0318
Test Metrics: Precision=0.9018, Recall=0.9018, F1=0.9018

Fine-tuning bert-large-cased (medium) with Train Size 400, Split 3...


Map: 100%|██████████| 400/400 [00:00<00:00, 8653.22 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7040.52 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9261.42 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:25<00:00,  1.95it/s, loss=0.376]


Epoch 1 Loss: 26.8637
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:25<00:00,  1.94it/s, loss=0.133]


Epoch 2 Loss: 13.2793
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:25<00:00,  1.97it/s, loss=0.247] 


Epoch 3 Loss: 8.8185
Test Metrics: Precision=0.9138, Recall=0.9138, F1=0.9138

Fine-tuning roberta-large (large) with Train Size 400, Split 3...


Map: 100%|██████████| 400/400 [00:00<00:00, 8556.40 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 6906.19 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9359.48 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:23<00:00,  2.12it/s, loss=0.258]


Epoch 1 Loss: 25.8084
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:22<00:00,  2.18it/s, loss=0.139]


Epoch 2 Loss: 12.2140
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:24<00:00,  2.06it/s, loss=0.186] 


Epoch 3 Loss: 7.9173
Test Metrics: Precision=0.9297, Recall=0.9297, F1=0.9297

Fine-tuning bert-base-cased (small) with Train Size 400, Split 4...


Map: 100%|██████████| 400/400 [00:00<00:00, 9164.82 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7032.41 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9135.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:07<00:00,  6.31it/s, loss=0.368]


Epoch 1 Loss: 32.5113
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:08<00:00,  6.07it/s, loss=0.428]


Epoch 2 Loss: 16.3435
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:08<00:00,  6.09it/s, loss=0.226] 


Epoch 3 Loss: 11.4494
Test Metrics: Precision=0.9051, Recall=0.9051, F1=0.9051

Fine-tuning bert-large-cased (medium) with Train Size 400, Split 4...


Map: 100%|██████████| 400/400 [00:00<00:00, 9234.33 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7265.54 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9407.89 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:24<00:00,  2.04it/s, loss=0.379]


Epoch 1 Loss: 28.6054
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:24<00:00,  2.02it/s, loss=0.319]


Epoch 2 Loss: 13.4896
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:25<00:00,  1.99it/s, loss=0.211]


Epoch 3 Loss: 9.2676
Test Metrics: Precision=0.9160, Recall=0.9160, F1=0.9160

Fine-tuning roberta-large (large) with Train Size 400, Split 4...


Map: 100%|██████████| 400/400 [00:00<00:00, 9331.46 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 6977.71 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9221.23 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:23<00:00,  2.09it/s, loss=0.428]


Epoch 1 Loss: 24.6294
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:23<00:00,  2.12it/s, loss=0.279] 


Epoch 2 Loss: 11.6950
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:23<00:00,  2.15it/s, loss=0.167] 


Epoch 3 Loss: 7.4802
Test Metrics: Precision=0.9324, Recall=0.9324, F1=0.9324

Fine-tuning bert-base-cased (small) with Train Size 400, Split 5...


Map: 100%|██████████| 400/400 [00:00<00:00, 8493.46 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 6984.98 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9078.81 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:08<00:00,  5.89it/s, loss=0.348]


Epoch 1 Loss: 33.6702
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:08<00:00,  5.83it/s, loss=0.195]


Epoch 2 Loss: 17.5238
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:08<00:00,  5.86it/s, loss=0.238] 


Epoch 3 Loss: 12.1292
Test Metrics: Precision=0.9088, Recall=0.9088, F1=0.9088

Fine-tuning bert-large-cased (medium) with Train Size 400, Split 5...


Map: 100%|██████████| 400/400 [00:00<00:00, 8959.60 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7473.81 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9345.39 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:26<00:00,  1.88it/s, loss=0.735]


Epoch 1 Loss: 27.6900
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:26<00:00,  1.88it/s, loss=0.282]


Epoch 2 Loss: 13.2833
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:26<00:00,  1.87it/s, loss=0.133] 


Epoch 3 Loss: 8.7051
Test Metrics: Precision=0.9203, Recall=0.9203, F1=0.9203

Fine-tuning roberta-large (large) with Train Size 400, Split 5...


Map: 100%|██████████| 400/400 [00:00<00:00, 9088.32 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 6991.96 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 8772.02 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:25<00:00,  1.98it/s, loss=0.292]


Epoch 1 Loss: 25.8010
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:24<00:00,  2.04it/s, loss=0.344]


Epoch 2 Loss: 12.1871
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:25<00:00,  1.97it/s, loss=0.0934]


Epoch 3 Loss: 8.0733
Test Metrics: Precision=0.9348, Recall=0.9348, F1=0.9348

Fine-tuning bert-base-cased (small) with Train Size 400, Split 6...


Map: 100%|██████████| 400/400 [00:00<00:00, 8874.49 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7866.10 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9232.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:08<00:00,  5.88it/s, loss=0.364]


Epoch 1 Loss: 32.8686
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:08<00:00,  5.84it/s, loss=0.295]


Epoch 2 Loss: 16.9287
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:08<00:00,  5.84it/s, loss=0.251]


Epoch 3 Loss: 12.6833
Test Metrics: Precision=0.9059, Recall=0.9059, F1=0.9059

Fine-tuning bert-large-cased (medium) with Train Size 400, Split 6...


Map: 100%|██████████| 400/400 [00:00<00:00, 8872.42 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7775.15 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9234.05 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:24<00:00,  2.02it/s, loss=0.365]


Epoch 1 Loss: 29.9157
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:26<00:00,  1.91it/s, loss=0.409]


Epoch 2 Loss: 16.8888
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:25<00:00,  1.95it/s, loss=0.322]


Epoch 3 Loss: 12.2826
Test Metrics: Precision=0.9098, Recall=0.9098, F1=0.9098

Fine-tuning roberta-large (large) with Train Size 400, Split 6...


Map: 100%|██████████| 400/400 [00:00<00:00, 8945.08 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7734.29 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9328.07 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:24<00:00,  2.05it/s, loss=0.297]


Epoch 1 Loss: 26.0496
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:23<00:00,  2.14it/s, loss=0.218]


Epoch 2 Loss: 13.1310
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:24<00:00,  2.05it/s, loss=0.12]  


Epoch 3 Loss: 9.2464
Test Metrics: Precision=0.9289, Recall=0.9289, F1=0.9289

Fine-tuning bert-base-cased (small) with Train Size 400, Split 7...


Map: 100%|██████████| 400/400 [00:00<00:00, 8695.38 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7897.39 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9268.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:08<00:00,  5.90it/s, loss=0.355]


Epoch 1 Loss: 29.7552
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:08<00:00,  5.91it/s, loss=0.223]


Epoch 2 Loss: 15.1844
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:08<00:00,  6.02it/s, loss=0.228]


Epoch 3 Loss: 10.7916
Test Metrics: Precision=0.9086, Recall=0.9086, F1=0.9086

Fine-tuning bert-large-cased (medium) with Train Size 400, Split 7...


Map: 100%|██████████| 400/400 [00:00<00:00, 8782.87 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7859.84 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9214.37 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:25<00:00,  1.94it/s, loss=0.286]


Epoch 1 Loss: 28.9324
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:25<00:00,  1.96it/s, loss=0.296]


Epoch 2 Loss: 14.7128
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:25<00:00,  1.94it/s, loss=0.153] 


Epoch 3 Loss: 10.3022
Test Metrics: Precision=0.9165, Recall=0.9165, F1=0.9165

Fine-tuning roberta-large (large) with Train Size 400, Split 7...


Map: 100%|██████████| 400/400 [00:00<00:00, 9073.82 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7508.77 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9505.65 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:24<00:00,  2.04it/s, loss=0.241]


Epoch 1 Loss: 25.7014
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:23<00:00,  2.16it/s, loss=0.344]


Epoch 2 Loss: 11.9191
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:24<00:00,  2.01it/s, loss=0.173] 


Epoch 3 Loss: 8.3230
Test Metrics: Precision=0.9311, Recall=0.9311, F1=0.9311

Fine-tuning bert-base-cased (small) with Train Size 400, Split 8...


Map: 100%|██████████| 400/400 [00:00<00:00, 8593.74 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7241.86 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9457.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:08<00:00,  5.65it/s, loss=0.335]


Epoch 1 Loss: 36.5363
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:08<00:00,  5.67it/s, loss=0.273]


Epoch 2 Loss: 17.8586
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:08<00:00,  5.57it/s, loss=0.302]


Epoch 3 Loss: 13.4386
Test Metrics: Precision=0.9059, Recall=0.9059, F1=0.9059

Fine-tuning bert-large-cased (medium) with Train Size 400, Split 8...


Map: 100%|██████████| 400/400 [00:00<00:00, 8659.88 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7130.90 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9509.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:27<00:00,  1.83it/s, loss=0.328]


Epoch 1 Loss: 28.1718
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:27<00:00,  1.85it/s, loss=0.421]


Epoch 2 Loss: 14.0657
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:27<00:00,  1.85it/s, loss=0.331] 


Epoch 3 Loss: 9.8827
Test Metrics: Precision=0.9160, Recall=0.9160, F1=0.9160

Fine-tuning roberta-large (large) with Train Size 400, Split 8...


Map: 100%|██████████| 400/400 [00:00<00:00, 8651.22 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 6870.56 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9430.70 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:24<00:00,  2.05it/s, loss=0.482]


Epoch 1 Loss: 25.9353
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:25<00:00,  2.00it/s, loss=0.128]


Epoch 2 Loss: 11.8449
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:25<00:00,  1.96it/s, loss=0.136] 


Epoch 3 Loss: 8.2058
Test Metrics: Precision=0.9317, Recall=0.9317, F1=0.9317

Fine-tuning bert-base-cased (small) with Train Size 400, Split 9...


Map: 100%|██████████| 400/400 [00:00<00:00, 8762.00 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7019.90 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9221.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:08<00:00,  6.12it/s, loss=0.328]


Epoch 1 Loss: 32.4823
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:08<00:00,  6.00it/s, loss=0.233]


Epoch 2 Loss: 16.1600
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:08<00:00,  6.11it/s, loss=0.338]


Epoch 3 Loss: 11.4446
Test Metrics: Precision=0.9055, Recall=0.9055, F1=0.9055

Fine-tuning bert-large-cased (medium) with Train Size 400, Split 9...


Map: 100%|██████████| 400/400 [00:00<00:00, 8866.56 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7043.63 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9047.75 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:25<00:00,  1.95it/s, loss=0.347]


Epoch 1 Loss: 27.2925
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:25<00:00,  1.93it/s, loss=0.353] 


Epoch 2 Loss: 12.5301
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:25<00:00,  1.98it/s, loss=0.143] 


Epoch 3 Loss: 7.9153
Test Metrics: Precision=0.9167, Recall=0.9167, F1=0.9167

Fine-tuning roberta-large (large) with Train Size 400, Split 9...


Map: 100%|██████████| 400/400 [00:00<00:00, 8910.16 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7517.35 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9319.14 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:23<00:00,  2.11it/s, loss=0.209]


Epoch 1 Loss: 26.3993
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:23<00:00,  2.12it/s, loss=0.243] 


Epoch 2 Loss: 11.4614
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:23<00:00,  2.10it/s, loss=0.105] 


Epoch 3 Loss: 7.6254
Test Metrics: Precision=0.9332, Recall=0.9332, F1=0.9332

Fine-tuning bert-base-cased (small) with Train Size 400, Split 10...


Map: 100%|██████████| 400/400 [00:00<00:00, 8544.85 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7660.83 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9174.14 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:08<00:00,  5.90it/s, loss=0.369]


Epoch 1 Loss: 33.4717
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:08<00:00,  5.70it/s, loss=0.264]


Epoch 2 Loss: 15.9304
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:08<00:00,  5.82it/s, loss=0.166]


Epoch 3 Loss: 11.4369
Test Metrics: Precision=0.9047, Recall=0.9047, F1=0.9047

Fine-tuning bert-large-cased (medium) with Train Size 400, Split 10...


Map: 100%|██████████| 400/400 [00:00<00:00, 8611.43 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7207.33 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9307.32 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:26<00:00,  1.86it/s, loss=0.313]


Epoch 1 Loss: 27.3445
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:26<00:00,  1.90it/s, loss=0.185] 


Epoch 2 Loss: 12.5016
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:25<00:00,  1.93it/s, loss=0.118] 


Epoch 3 Loss: 8.1555
Test Metrics: Precision=0.9175, Recall=0.9175, F1=0.9175

Fine-tuning roberta-large (large) with Train Size 400, Split 10...


Map: 100%|██████████| 400/400 [00:00<00:00, 8720.47 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7782.18 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9342.41 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:23<00:00,  2.09it/s, loss=0.326]


Epoch 1 Loss: 23.5910
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:23<00:00,  2.08it/s, loss=0.178] 


Epoch 2 Loss: 11.8364
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:24<00:00,  2.04it/s, loss=0.159] 


Epoch 3 Loss: 8.0030
Test Metrics: Precision=0.9327, Recall=0.9327, F1=0.9327

Fine-tuning bert-base-cased (small) with Train Size 420, Split 1...


Map: 100%|██████████| 420/420 [00:00<00:00, 9051.15 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 8104.94 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9075.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:08<00:00,  6.34it/s, loss=0.541]


Epoch 1 Loss: 33.2677
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:08<00:00,  6.19it/s, loss=0.272]


Epoch 2 Loss: 16.9960
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:08<00:00,  6.18it/s, loss=0.0646]


Epoch 3 Loss: 11.8344
Test Metrics: Precision=0.9098, Recall=0.9098, F1=0.9098

Fine-tuning bert-large-cased (medium) with Train Size 420, Split 1...


Map: 100%|██████████| 420/420 [00:00<00:00, 8802.05 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 8010.04 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9149.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:26<00:00,  2.03it/s, loss=0.327]


Epoch 1 Loss: 29.5942
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:26<00:00,  2.00it/s, loss=0.198]


Epoch 2 Loss: 15.2260
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:25<00:00,  2.06it/s, loss=0.176]


Epoch 3 Loss: 10.9145
Test Metrics: Precision=0.9160, Recall=0.9160, F1=0.9160

Fine-tuning roberta-large (large) with Train Size 420, Split 1...


Map: 100%|██████████| 420/420 [00:00<00:00, 9079.24 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 8240.86 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9287.17 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:24<00:00,  2.20it/s, loss=0.288]


Epoch 1 Loss: 28.5684
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:23<00:00,  2.25it/s, loss=0.211]


Epoch 2 Loss: 13.5234
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:23<00:00,  2.28it/s, loss=0.105] 


Epoch 3 Loss: 9.2715
Test Metrics: Precision=0.9279, Recall=0.9279, F1=0.9279

Fine-tuning bert-base-cased (small) with Train Size 420, Split 2...


Map: 100%|██████████| 420/420 [00:00<00:00, 9359.25 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7817.90 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 8892.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:08<00:00,  6.36it/s, loss=0.304]


Epoch 1 Loss: 32.9421
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:08<00:00,  6.49it/s, loss=0.184]


Epoch 2 Loss: 16.5732
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:08<00:00,  6.04it/s, loss=0.261] 


Epoch 3 Loss: 11.8723
Test Metrics: Precision=0.9070, Recall=0.9070, F1=0.9070

Fine-tuning bert-large-cased (medium) with Train Size 420, Split 2...


Map: 100%|██████████| 420/420 [00:00<00:00, 8792.56 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7719.58 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 8838.19 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:25<00:00,  2.05it/s, loss=0.254]


Epoch 1 Loss: 28.9730
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:26<00:00,  2.01it/s, loss=0.136]


Epoch 2 Loss: 13.5536
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:26<00:00,  2.00it/s, loss=0.292] 


Epoch 3 Loss: 8.8606
Test Metrics: Precision=0.9162, Recall=0.9162, F1=0.9162

Fine-tuning roberta-large (large) with Train Size 420, Split 2...


Map: 100%|██████████| 420/420 [00:00<00:00, 9097.71 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7770.31 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 8983.32 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:24<00:00,  2.19it/s, loss=0.293]


Epoch 1 Loss: 26.1478
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:24<00:00,  2.14it/s, loss=0.178]


Epoch 2 Loss: 12.1304
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:23<00:00,  2.24it/s, loss=0.102] 


Epoch 3 Loss: 8.4469
Test Metrics: Precision=0.9307, Recall=0.9307, F1=0.9307

Fine-tuning bert-base-cased (small) with Train Size 420, Split 3...


Map: 100%|██████████| 420/420 [00:00<00:00, 8678.34 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7257.63 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9269.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:08<00:00,  6.09it/s, loss=0.342]


Epoch 1 Loss: 33.2286
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:08<00:00,  5.97it/s, loss=0.295]


Epoch 2 Loss: 16.6080
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:08<00:00,  6.22it/s, loss=0.105]


Epoch 3 Loss: 11.8823
Test Metrics: Precision=0.9044, Recall=0.9044, F1=0.9044

Fine-tuning bert-large-cased (medium) with Train Size 420, Split 3...


Map: 100%|██████████| 420/420 [00:00<00:00, 8753.24 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7268.56 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 3448.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:27<00:00,  1.96it/s, loss=0.389]


Epoch 1 Loss: 29.7286
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:27<00:00,  1.93it/s, loss=0.136]


Epoch 2 Loss: 15.2716
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:27<00:00,  1.94it/s, loss=0.222] 


Epoch 3 Loss: 10.2574
Test Metrics: Precision=0.9129, Recall=0.9129, F1=0.9129

Fine-tuning roberta-large (large) with Train Size 420, Split 3...


Map: 100%|██████████| 420/420 [00:00<00:00, 8967.30 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7406.85 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9442.09 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:25<00:00,  2.08it/s, loss=0.236]


Epoch 1 Loss: 24.7929
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:25<00:00,  2.12it/s, loss=0.288]


Epoch 2 Loss: 12.2687
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:24<00:00,  2.15it/s, loss=0.0917]


Epoch 3 Loss: 8.2828
Test Metrics: Precision=0.9294, Recall=0.9294, F1=0.9294

Fine-tuning bert-base-cased (small) with Train Size 420, Split 4...


Map: 100%|██████████| 420/420 [00:00<00:00, 8897.14 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7746.05 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9107.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:08<00:00,  6.13it/s, loss=0.424]


Epoch 1 Loss: 33.0174
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:08<00:00,  6.17it/s, loss=0.286]


Epoch 2 Loss: 16.0329
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:08<00:00,  6.05it/s, loss=0.41] 


Epoch 3 Loss: 11.7067
Test Metrics: Precision=0.9061, Recall=0.9061, F1=0.9061

Fine-tuning bert-large-cased (medium) with Train Size 420, Split 4...


Map: 100%|██████████| 420/420 [00:00<00:00, 8643.47 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7570.95 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9182.66 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:26<00:00,  2.01it/s, loss=0.258]


Epoch 1 Loss: 30.2183
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:25<00:00,  2.07it/s, loss=0.148]


Epoch 2 Loss: 14.9959
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:25<00:00,  2.05it/s, loss=0.144]


Epoch 3 Loss: 10.0701
Test Metrics: Precision=0.9140, Recall=0.9140, F1=0.9140

Fine-tuning roberta-large (large) with Train Size 420, Split 4...


Map: 100%|██████████| 420/420 [00:00<00:00, 8882.83 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7294.89 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9071.19 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:24<00:00,  2.21it/s, loss=0.354]


Epoch 1 Loss: 26.6198
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:24<00:00,  2.14it/s, loss=0.244] 


Epoch 2 Loss: 11.8412
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:24<00:00,  2.13it/s, loss=0.183] 


Epoch 3 Loss: 8.0771
Test Metrics: Precision=0.9292, Recall=0.9292, F1=0.9292

Fine-tuning bert-base-cased (small) with Train Size 420, Split 5...


Map: 100%|██████████| 420/420 [00:00<00:00, 8845.81 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7295.50 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9064.10 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:09<00:00,  5.74it/s, loss=0.252]


Epoch 1 Loss: 35.5645
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:09<00:00,  5.88it/s, loss=0.245]


Epoch 2 Loss: 19.0379
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:09<00:00,  5.86it/s, loss=0.13] 


Epoch 3 Loss: 13.3020
Test Metrics: Precision=0.9103, Recall=0.9103, F1=0.9103

Fine-tuning bert-large-cased (medium) with Train Size 420, Split 5...


Map: 100%|██████████| 420/420 [00:00<00:00, 8935.05 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7808.89 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9384.75 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:28<00:00,  1.84it/s, loss=0.288]


Epoch 1 Loss: 30.9128
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:27<00:00,  1.94it/s, loss=0.227]


Epoch 2 Loss: 15.8282
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:28<00:00,  1.83it/s, loss=0.129]


Epoch 3 Loss: 11.5537
Test Metrics: Precision=0.9165, Recall=0.9165, F1=0.9165

Fine-tuning roberta-large (large) with Train Size 420, Split 5...


Map: 100%|██████████| 420/420 [00:00<00:00, 9066.85 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7649.52 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9251.76 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:26<00:00,  2.00it/s, loss=0.345]


Epoch 1 Loss: 26.6790
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:26<00:00,  1.98it/s, loss=0.356]


Epoch 2 Loss: 13.1333
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:26<00:00,  2.01it/s, loss=0.23]  


Epoch 3 Loss: 8.4252
Test Metrics: Precision=0.9351, Recall=0.9351, F1=0.9351

Fine-tuning bert-base-cased (small) with Train Size 420, Split 6...


Map: 100%|██████████| 420/420 [00:00<00:00, 8678.51 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 8080.58 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 8889.55 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:08<00:00,  6.10it/s, loss=0.572]


Epoch 1 Loss: 33.6340
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:09<00:00,  5.86it/s, loss=0.253]


Epoch 2 Loss: 16.8031
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:08<00:00,  6.18it/s, loss=0.281]


Epoch 3 Loss: 12.3363
Test Metrics: Precision=0.9076, Recall=0.9076, F1=0.9076

Fine-tuning bert-large-cased (medium) with Train Size 420, Split 6...


Map: 100%|██████████| 420/420 [00:00<00:00, 8520.97 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 8412.64 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9204.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:26<00:00,  1.97it/s, loss=0.119]


Epoch 1 Loss: 27.4299
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:26<00:00,  1.99it/s, loss=0.327]


Epoch 2 Loss: 13.7798
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:26<00:00,  1.98it/s, loss=0.144] 


Epoch 3 Loss: 9.0214
Test Metrics: Precision=0.9154, Recall=0.9154, F1=0.9154

Fine-tuning roberta-large (large) with Train Size 420, Split 6...


Map: 100%|██████████| 420/420 [00:00<00:00, 8939.58 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 8342.33 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9279.32 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:25<00:00,  2.05it/s, loss=0.556]


Epoch 1 Loss: 28.0295
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:24<00:00,  2.16it/s, loss=0.275]


Epoch 2 Loss: 14.0692
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:25<00:00,  2.07it/s, loss=0.128]


Epoch 3 Loss: 10.2942
Test Metrics: Precision=0.9276, Recall=0.9276, F1=0.9276

Fine-tuning bert-base-cased (small) with Train Size 420, Split 7...


Map: 100%|██████████| 420/420 [00:00<00:00, 2392.80 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7868.01 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9196.95 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:08<00:00,  6.15it/s, loss=0.367]


Epoch 1 Loss: 33.1153
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:08<00:00,  6.14it/s, loss=0.292]


Epoch 2 Loss: 15.6884
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:08<00:00,  6.13it/s, loss=0.332]


Epoch 3 Loss: 11.6750
Test Metrics: Precision=0.9098, Recall=0.9098, F1=0.9098

Fine-tuning bert-large-cased (medium) with Train Size 420, Split 7...


Map: 100%|██████████| 420/420 [00:00<00:00, 8697.92 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7364.12 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9275.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:26<00:00,  1.97it/s, loss=0.726]


Epoch 1 Loss: 29.2423
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:25<00:00,  2.05it/s, loss=0.36] 


Epoch 2 Loss: 14.1284
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:26<00:00,  1.99it/s, loss=0.155] 


Epoch 3 Loss: 8.9735
Test Metrics: Precision=0.9194, Recall=0.9194, F1=0.9194

Fine-tuning roberta-large (large) with Train Size 420, Split 7...


Map: 100%|██████████| 420/420 [00:00<00:00, 8906.95 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7842.79 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9404.98 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:25<00:00,  2.09it/s, loss=0.448]


Epoch 1 Loss: 25.0203
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:25<00:00,  2.08it/s, loss=0.247] 


Epoch 2 Loss: 11.4571
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:25<00:00,  2.07it/s, loss=0.122] 


Epoch 3 Loss: 7.6382
Test Metrics: Precision=0.9337, Recall=0.9337, F1=0.9337

Fine-tuning bert-base-cased (small) with Train Size 420, Split 8...


Map: 100%|██████████| 420/420 [00:00<00:00, 8680.78 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7120.77 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9532.98 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:09<00:00,  5.69it/s, loss=0.547]


Epoch 1 Loss: 34.4745
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:09<00:00,  5.66it/s, loss=0.242]


Epoch 2 Loss: 17.5106
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:09<00:00,  5.67it/s, loss=0.431]


Epoch 3 Loss: 13.8047
Test Metrics: Precision=0.9094, Recall=0.9094, F1=0.9094

Fine-tuning bert-large-cased (medium) with Train Size 420, Split 8...


Map: 100%|██████████| 420/420 [00:00<00:00, 8526.45 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7385.58 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 3485.85 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:29<00:00,  1.82it/s, loss=0.374]


Epoch 1 Loss: 30.3351
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:28<00:00,  1.85it/s, loss=0.165]


Epoch 2 Loss: 15.4564
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:28<00:00,  1.84it/s, loss=0.198] 


Epoch 3 Loss: 10.7205
Test Metrics: Precision=0.9159, Recall=0.9159, F1=0.9159

Fine-tuning roberta-large (large) with Train Size 420, Split 8...


Map: 100%|██████████| 420/420 [00:00<00:00, 8593.29 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 6974.32 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9423.81 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:26<00:00,  1.99it/s, loss=0.27] 


Epoch 1 Loss: 26.7002
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:26<00:00,  1.99it/s, loss=0.0741]


Epoch 2 Loss: 13.9815
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:26<00:00,  1.98it/s, loss=0.193] 


Epoch 3 Loss: 9.2056
Test Metrics: Precision=0.9287, Recall=0.9287, F1=0.9287

Fine-tuning bert-base-cased (small) with Train Size 420, Split 9...


Map: 100%|██████████| 420/420 [00:00<00:00, 8751.24 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7249.27 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9283.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:08<00:00,  6.25it/s, loss=0.261]


Epoch 1 Loss: 32.0923
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:08<00:00,  6.07it/s, loss=0.201]


Epoch 2 Loss: 15.3451
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:08<00:00,  6.06it/s, loss=0.187] 


Epoch 3 Loss: 11.1154
Test Metrics: Precision=0.9014, Recall=0.9014, F1=0.9014

Fine-tuning bert-large-cased (medium) with Train Size 420, Split 9...


Map: 100%|██████████| 420/420 [00:00<00:00, 8746.98 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 6918.85 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9237.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:27<00:00,  1.96it/s, loss=0.344]


Epoch 1 Loss: 30.1715
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:26<00:00,  1.98it/s, loss=0.365]


Epoch 2 Loss: 14.2287
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:27<00:00,  1.95it/s, loss=0.0903]


Epoch 3 Loss: 9.4293
Test Metrics: Precision=0.9150, Recall=0.9150, F1=0.9150

Fine-tuning roberta-large (large) with Train Size 420, Split 9...


Map: 100%|██████████| 420/420 [00:00<00:00, 8708.33 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 6833.10 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9268.35 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:24<00:00,  2.13it/s, loss=0.441]


Epoch 1 Loss: 31.0537
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:25<00:00,  2.11it/s, loss=0.283] 


Epoch 2 Loss: 13.9914
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:25<00:00,  2.10it/s, loss=0.0764]


Epoch 3 Loss: 9.6988
Test Metrics: Precision=0.9252, Recall=0.9252, F1=0.9252

Fine-tuning bert-base-cased (small) with Train Size 420, Split 10...


Map: 100%|██████████| 420/420 [00:00<00:00, 8435.40 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 6810.12 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9203.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:08<00:00,  6.10it/s, loss=0.368]


Epoch 1 Loss: 32.9137
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:09<00:00,  5.82it/s, loss=0.333]


Epoch 2 Loss: 17.2271
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:08<00:00,  6.18it/s, loss=0.26]  


Epoch 3 Loss: 12.4140
Test Metrics: Precision=0.9053, Recall=0.9053, F1=0.9053

Fine-tuning bert-large-cased (medium) with Train Size 420, Split 10...


Map: 100%|██████████| 420/420 [00:00<00:00, 8677.70 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7244.20 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9283.69 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:26<00:00,  1.98it/s, loss=0.576]


Epoch 1 Loss: 28.8770
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:27<00:00,  1.92it/s, loss=0.156]


Epoch 2 Loss: 13.5208
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:27<00:00,  1.92it/s, loss=0.0616]


Epoch 3 Loss: 9.2328
Test Metrics: Precision=0.9188, Recall=0.9188, F1=0.9188

Fine-tuning roberta-large (large) with Train Size 420, Split 10...


Map: 100%|██████████| 420/420 [00:00<00:00, 8615.69 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7263.16 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9659.36 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:25<00:00,  2.07it/s, loss=0.233]


Epoch 1 Loss: 28.5773
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:25<00:00,  2.06it/s, loss=0.242]


Epoch 2 Loss: 13.5443
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:25<00:00,  2.04it/s, loss=0.2]   


Epoch 3 Loss: 9.7203
Test Metrics: Precision=0.9311, Recall=0.9311, F1=0.9311

Fine-tuning bert-base-cased (small) with Train Size 440, Split 1...


Map: 100%|██████████| 440/440 [00:00<00:00, 8862.64 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 8082.57 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9055.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:08<00:00,  6.39it/s, loss=0.4]  


Epoch 1 Loss: 32.7265
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:08<00:00,  6.14it/s, loss=0.31] 


Epoch 2 Loss: 16.8618
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:08<00:00,  6.33it/s, loss=0.17]  


Epoch 3 Loss: 11.9467
Test Metrics: Precision=0.9105, Recall=0.9105, F1=0.9105

Fine-tuning bert-large-cased (medium) with Train Size 440, Split 1...


Map: 100%|██████████| 440/440 [00:00<00:00, 8840.52 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 8147.88 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 8921.34 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:27<00:00,  2.01it/s, loss=0.435]


Epoch 1 Loss: 32.0716
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:27<00:00,  2.02it/s, loss=0.238]


Epoch 2 Loss: 17.4472
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:27<00:00,  2.02it/s, loss=0.261] 


Epoch 3 Loss: 12.6972
Test Metrics: Precision=0.9108, Recall=0.9108, F1=0.9108

Fine-tuning roberta-large (large) with Train Size 440, Split 1...


Map: 100%|██████████| 440/440 [00:00<00:00, 9081.26 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 8061.92 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9205.41 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:25<00:00,  2.18it/s, loss=0.316]


Epoch 1 Loss: 28.0194
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:24<00:00,  2.21it/s, loss=0.203]


Epoch 2 Loss: 13.2859
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:25<00:00,  2.14it/s, loss=0.135] 


Epoch 3 Loss: 9.2293
Test Metrics: Precision=0.9312, Recall=0.9312, F1=0.9312

Fine-tuning bert-base-cased (small) with Train Size 440, Split 2...


Map: 100%|██████████| 440/440 [00:00<00:00, 2408.77 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7458.35 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9231.95 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:09<00:00,  5.96it/s, loss=0.549]


Epoch 1 Loss: 34.2124
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:09<00:00,  6.06it/s, loss=0.171]


Epoch 2 Loss: 17.2835
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:09<00:00,  6.11it/s, loss=0.333] 


Epoch 3 Loss: 12.1629
Test Metrics: Precision=0.9061, Recall=0.9061, F1=0.9061

Fine-tuning bert-large-cased (medium) with Train Size 440, Split 2...


Map: 100%|██████████| 440/440 [00:00<00:00, 8638.98 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7478.14 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9021.84 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:27<00:00,  2.03it/s, loss=0.419]


Epoch 1 Loss: 32.8864
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:28<00:00,  1.96it/s, loss=0.226]


Epoch 2 Loss: 17.2344
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:26<00:00,  2.06it/s, loss=0.288]


Epoch 3 Loss: 12.4815
Test Metrics: Precision=0.9098, Recall=0.9098, F1=0.9098

Fine-tuning roberta-large (large) with Train Size 440, Split 2...


Map: 100%|██████████| 440/440 [00:00<00:00, 8757.46 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7779.84 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9219.60 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:25<00:00,  2.14it/s, loss=0.309]


Epoch 1 Loss: 28.7049
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:26<00:00,  2.11it/s, loss=0.11] 


Epoch 2 Loss: 13.1144
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:26<00:00,  2.11it/s, loss=0.124] 


Epoch 3 Loss: 9.3447
Test Metrics: Precision=0.9310, Recall=0.9310, F1=0.9310

Fine-tuning bert-base-cased (small) with Train Size 440, Split 3...


Map: 100%|██████████| 440/440 [00:00<00:00, 8700.77 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7478.60 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9317.97 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:08<00:00,  6.12it/s, loss=0.409]


Epoch 1 Loss: 33.9913
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:09<00:00,  5.81it/s, loss=0.271]


Epoch 2 Loss: 17.4616
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:09<00:00,  5.94it/s, loss=0.204] 


Epoch 3 Loss: 12.4990
Test Metrics: Precision=0.9059, Recall=0.9059, F1=0.9059

Fine-tuning bert-large-cased (medium) with Train Size 440, Split 3...


Map: 100%|██████████| 440/440 [00:00<00:00, 8848.36 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7023.23 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9157.26 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:29<00:00,  1.89it/s, loss=0.299]


Epoch 1 Loss: 29.0276
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:28<00:00,  1.92it/s, loss=0.387]


Epoch 2 Loss: 15.9890
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:28<00:00,  1.94it/s, loss=0.102] 


Epoch 3 Loss: 11.9977
Test Metrics: Precision=0.9091, Recall=0.9091, F1=0.9091

Fine-tuning roberta-large (large) with Train Size 440, Split 3...


Map: 100%|██████████| 440/440 [00:00<00:00, 8689.67 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7351.39 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9504.97 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:26<00:00,  2.09it/s, loss=0.312]


Epoch 1 Loss: 27.7846
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:25<00:00,  2.16it/s, loss=0.322]


Epoch 2 Loss: 13.9716
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:26<00:00,  2.10it/s, loss=0.0786]


Epoch 3 Loss: 10.4614
Test Metrics: Precision=0.9253, Recall=0.9253, F1=0.9253

Fine-tuning bert-base-cased (small) with Train Size 440, Split 4...


Map: 100%|██████████| 440/440 [00:00<00:00, 8849.72 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7052.35 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9370.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:09<00:00,  6.08it/s, loss=0.295]


Epoch 1 Loss: 33.6160
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:09<00:00,  5.93it/s, loss=0.238]


Epoch 2 Loss: 16.6365
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:09<00:00,  5.98it/s, loss=0.149]


Epoch 3 Loss: 11.9623
Test Metrics: Precision=0.9090, Recall=0.9090, F1=0.9090

Fine-tuning bert-large-cased (medium) with Train Size 440, Split 4...


Map: 100%|██████████| 440/440 [00:00<00:00, 8436.12 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7075.87 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9083.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:27<00:00,  2.00it/s, loss=0.307]


Epoch 1 Loss: 29.5437
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:28<00:00,  1.94it/s, loss=0.171]


Epoch 2 Loss: 14.5182
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:27<00:00,  2.02it/s, loss=0.204] 


Epoch 3 Loss: 9.4860
Test Metrics: Precision=0.9168, Recall=0.9168, F1=0.9168

Fine-tuning roberta-large (large) with Train Size 440, Split 4...


Map: 100%|██████████| 440/440 [00:00<00:00, 8602.42 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7238.37 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9285.57 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:25<00:00,  2.12it/s, loss=0.232]


Epoch 1 Loss: 27.8701
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:26<00:00,  2.06it/s, loss=0.19] 


Epoch 2 Loss: 12.5197
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:25<00:00,  2.16it/s, loss=0.171] 


Epoch 3 Loss: 8.3890
Test Metrics: Precision=0.9307, Recall=0.9307, F1=0.9307

Fine-tuning bert-base-cased (small) with Train Size 440, Split 5...


Map: 100%|██████████| 440/440 [00:00<00:00, 8649.75 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 8149.67 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 8993.82 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:09<00:00,  5.89it/s, loss=0.448]


Epoch 1 Loss: 36.4720
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:09<00:00,  5.76it/s, loss=0.35] 


Epoch 2 Loss: 17.6007
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:09<00:00,  5.77it/s, loss=0.129]


Epoch 3 Loss: 12.6475
Test Metrics: Precision=0.9136, Recall=0.9136, F1=0.9136

Fine-tuning bert-large-cased (medium) with Train Size 440, Split 5...


Map: 100%|██████████| 440/440 [00:00<00:00, 8561.51 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 8047.33 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9152.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:29<00:00,  1.87it/s, loss=0.49] 


Epoch 1 Loss: 33.1323
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:28<00:00,  1.90it/s, loss=0.428]


Epoch 2 Loss: 17.7811
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:29<00:00,  1.89it/s, loss=0.322]


Epoch 3 Loss: 13.3493
Test Metrics: Precision=0.9139, Recall=0.9139, F1=0.9139

Fine-tuning roberta-large (large) with Train Size 440, Split 5...


Map: 100%|██████████| 440/440 [00:00<00:00, 8848.11 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7958.66 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9243.36 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:27<00:00,  1.97it/s, loss=0.339]


Epoch 1 Loss: 29.2800
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:27<00:00,  2.02it/s, loss=0.121]


Epoch 2 Loss: 13.5002
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:28<00:00,  1.96it/s, loss=0.128] 


Epoch 3 Loss: 9.3600
Test Metrics: Precision=0.9342, Recall=0.9342, F1=0.9342

Fine-tuning bert-base-cased (small) with Train Size 440, Split 6...


Map: 100%|██████████| 440/440 [00:00<00:00, 8797.87 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7493.63 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9183.32 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:09<00:00,  5.89it/s, loss=0.446]


Epoch 1 Loss: 33.1664
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:09<00:00,  6.08it/s, loss=0.393]


Epoch 2 Loss: 17.7344
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:09<00:00,  5.91it/s, loss=0.61]  


Epoch 3 Loss: 12.8532
Test Metrics: Precision=0.9050, Recall=0.9050, F1=0.9050

Fine-tuning bert-large-cased (medium) with Train Size 440, Split 6...


Map: 100%|██████████| 440/440 [00:00<00:00, 8715.52 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7231.42 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9098.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:28<00:00,  1.95it/s, loss=0.357]


Epoch 1 Loss: 30.3670
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:28<00:00,  1.90it/s, loss=0.21] 


Epoch 2 Loss: 14.0377
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:27<00:00,  2.01it/s, loss=0.13]  


Epoch 3 Loss: 9.0791
Test Metrics: Precision=0.9151, Recall=0.9151, F1=0.9151

Fine-tuning roberta-large (large) with Train Size 440, Split 6...


Map: 100%|██████████| 440/440 [00:00<00:00, 8991.14 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7651.46 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9313.83 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:26<00:00,  2.05it/s, loss=0.238]


Epoch 1 Loss: 27.3573
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:27<00:00,  2.02it/s, loss=0.332]


Epoch 2 Loss: 13.4138
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:25<00:00,  2.15it/s, loss=0.148] 


Epoch 3 Loss: 9.6969
Test Metrics: Precision=0.9288, Recall=0.9288, F1=0.9288

Fine-tuning bert-base-cased (small) with Train Size 440, Split 7...


Map: 100%|██████████| 440/440 [00:00<00:00, 8695.73 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 8048.56 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9106.53 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:09<00:00,  5.88it/s, loss=0.299]


Epoch 1 Loss: 35.1405
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:09<00:00,  5.82it/s, loss=0.408]


Epoch 2 Loss: 18.1777
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:09<00:00,  5.93it/s, loss=0.207]


Epoch 3 Loss: 12.9989
Test Metrics: Precision=0.9108, Recall=0.9108, F1=0.9108

Fine-tuning bert-large-cased (medium) with Train Size 440, Split 7...


Map: 100%|██████████| 440/440 [00:00<00:00, 8572.37 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 8260.57 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9028.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:27<00:00,  1.98it/s, loss=0.408]


Epoch 1 Loss: 32.4384
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:29<00:00,  1.88it/s, loss=0.295]


Epoch 2 Loss: 16.5105
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:28<00:00,  1.91it/s, loss=0.147]


Epoch 3 Loss: 12.1932
Test Metrics: Precision=0.9175, Recall=0.9175, F1=0.9175

Fine-tuning roberta-large (large) with Train Size 440, Split 7...


Map: 100%|██████████| 440/440 [00:00<00:00, 8658.36 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7747.34 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9162.98 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:26<00:00,  2.05it/s, loss=0.274]


Epoch 1 Loss: 27.3465
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:26<00:00,  2.10it/s, loss=0.21] 


Epoch 2 Loss: 13.0713
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:27<00:00,  2.02it/s, loss=0.128] 


Epoch 3 Loss: 9.3616
Test Metrics: Precision=0.9323, Recall=0.9323, F1=0.9323

Fine-tuning bert-base-cased (small) with Train Size 440, Split 8...


Map: 100%|██████████| 440/440 [00:00<00:00, 8673.49 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 6829.60 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9146.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:09<00:00,  5.80it/s, loss=0.502]


Epoch 1 Loss: 37.8821
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:09<00:00,  5.64it/s, loss=0.265]


Epoch 2 Loss: 17.9105
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:09<00:00,  5.69it/s, loss=0.304]


Epoch 3 Loss: 13.1776
Test Metrics: Precision=0.9095, Recall=0.9095, F1=0.9095

Fine-tuning bert-large-cased (medium) with Train Size 440, Split 8...


Map: 100%|██████████| 440/440 [00:00<00:00, 8741.61 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7103.38 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9278.32 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:29<00:00,  1.85it/s, loss=0.333]


Epoch 1 Loss: 31.5727
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:28<00:00,  1.91it/s, loss=0.349]


Epoch 2 Loss: 15.7985
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:30<00:00,  1.83it/s, loss=0.173] 


Epoch 3 Loss: 11.0042
Test Metrics: Precision=0.9209, Recall=0.9209, F1=0.9209

Fine-tuning roberta-large (large) with Train Size 440, Split 8...


Map: 100%|██████████| 440/440 [00:00<00:00, 8823.32 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 6958.74 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9523.82 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:28<00:00,  1.90it/s, loss=0.247]


Epoch 1 Loss: 27.4743
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:27<00:00,  1.99it/s, loss=0.298] 


Epoch 2 Loss: 12.6151
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:27<00:00,  2.00it/s, loss=0.123] 


Epoch 3 Loss: 8.3390
Test Metrics: Precision=0.9354, Recall=0.9354, F1=0.9354

Fine-tuning bert-base-cased (small) with Train Size 440, Split 9...


Map: 100%|██████████| 440/440 [00:00<00:00, 8790.28 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7995.03 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9008.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:08<00:00,  6.20it/s, loss=0.309]


Epoch 1 Loss: 34.1775
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:08<00:00,  6.19it/s, loss=0.202]


Epoch 2 Loss: 15.9322
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:09<00:00,  5.93it/s, loss=0.279] 


Epoch 3 Loss: 11.8265
Test Metrics: Precision=0.9080, Recall=0.9080, F1=0.9080

Fine-tuning bert-large-cased (medium) with Train Size 440, Split 9...


Map: 100%|██████████| 440/440 [00:00<00:00, 8768.94 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7938.46 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9310.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:28<00:00,  1.93it/s, loss=0.389]


Epoch 1 Loss: 27.2731
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:27<00:00,  2.02it/s, loss=0.2]   


Epoch 2 Loss: 12.6767
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:26<00:00,  2.04it/s, loss=0.0634]


Epoch 3 Loss: 8.0231
Test Metrics: Precision=0.9179, Recall=0.9179, F1=0.9179

Fine-tuning roberta-large (large) with Train Size 440, Split 9...


Map: 100%|██████████| 440/440 [00:00<00:00, 8868.00 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 8070.91 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9554.56 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:26<00:00,  2.11it/s, loss=0.405]


Epoch 1 Loss: 27.3708
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:25<00:00,  2.18it/s, loss=0.162]


Epoch 2 Loss: 12.5425
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:26<00:00,  2.10it/s, loss=0.169] 


Epoch 3 Loss: 8.3965
Test Metrics: Precision=0.9316, Recall=0.9316, F1=0.9316

Fine-tuning bert-base-cased (small) with Train Size 440, Split 10...


Map: 100%|██████████| 440/440 [00:00<00:00, 8687.54 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7966.05 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9113.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:09<00:00,  5.90it/s, loss=0.271]


Epoch 1 Loss: 32.9913
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:09<00:00,  5.81it/s, loss=0.344]


Epoch 2 Loss: 15.9377
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:09<00:00,  5.86it/s, loss=0.2]   


Epoch 3 Loss: 11.6826
Test Metrics: Precision=0.9063, Recall=0.9063, F1=0.9063

Fine-tuning bert-large-cased (medium) with Train Size 440, Split 10...


Map: 100%|██████████| 440/440 [00:00<00:00, 8885.47 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7982.58 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9253.70 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:28<00:00,  1.96it/s, loss=0.416]


Epoch 1 Loss: 29.6977
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:28<00:00,  1.91it/s, loss=0.199]


Epoch 2 Loss: 14.2687
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:28<00:00,  1.94it/s, loss=0.187] 


Epoch 3 Loss: 9.9977
Test Metrics: Precision=0.9173, Recall=0.9173, F1=0.9173

Fine-tuning roberta-large (large) with Train Size 440, Split 10...


Map: 100%|██████████| 440/440 [00:00<00:00, 8635.91 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7991.92 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9242.06 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:26<00:00,  2.06it/s, loss=0.233]


Epoch 1 Loss: 26.7349
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:25<00:00,  2.13it/s, loss=0.221] 


Epoch 2 Loss: 12.8288
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:26<00:00,  2.08it/s, loss=0.198] 


Epoch 3 Loss: 8.8236
Test Metrics: Precision=0.9323, Recall=0.9323, F1=0.9323

Fine-tuning bert-base-cased (small) with Train Size 460, Split 1...


Map: 100%|██████████| 460/460 [00:00<00:00, 8716.82 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7086.27 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9318.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:09<00:00,  6.29it/s, loss=0.121]


Epoch 1 Loss: 36.0847
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:08<00:00,  6.49it/s, loss=0.415]


Epoch 2 Loss: 18.1194
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:09<00:00,  6.40it/s, loss=0.131] 


Epoch 3 Loss: 13.2270
Test Metrics: Precision=0.9080, Recall=0.9080, F1=0.9080

Fine-tuning bert-large-cased (medium) with Train Size 460, Split 1...


Map: 100%|██████████| 460/460 [00:00<00:00, 9077.17 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7281.50 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 8914.19 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:27<00:00,  2.08it/s, loss=0.478]


Epoch 1 Loss: 31.9227
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:28<00:00,  2.05it/s, loss=0.234]


Epoch 2 Loss: 15.7206
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:28<00:00,  2.03it/s, loss=0.202] 


Epoch 3 Loss: 10.5715
Test Metrics: Precision=0.9189, Recall=0.9189, F1=0.9189

Fine-tuning roberta-large (large) with Train Size 460, Split 1...


Map: 100%|██████████| 460/460 [00:00<00:00, 8999.90 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7190.86 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9192.13 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:26<00:00,  2.19it/s, loss=0.394]


Epoch 1 Loss: 28.4796
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:25<00:00,  2.27it/s, loss=0.285]


Epoch 2 Loss: 13.3354
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:26<00:00,  2.21it/s, loss=0.134] 


Epoch 3 Loss: 9.1711
Test Metrics: Precision=0.9306, Recall=0.9306, F1=0.9306

Fine-tuning bert-base-cased (small) with Train Size 460, Split 2...


Map: 100%|██████████| 460/460 [00:00<00:00, 9096.64 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7332.14 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9100.26 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:09<00:00,  6.22it/s, loss=0.307]


Epoch 1 Loss: 35.1765
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:09<00:00,  6.06it/s, loss=0.294]


Epoch 2 Loss: 18.2365
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:09<00:00,  6.22it/s, loss=0.139]


Epoch 3 Loss: 12.6279
Test Metrics: Precision=0.9085, Recall=0.9085, F1=0.9085

Fine-tuning bert-large-cased (medium) with Train Size 460, Split 2...


Map: 100%|██████████| 460/460 [00:00<00:00, 8674.92 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 6968.54 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9136.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:28<00:00,  2.02it/s, loss=0.365]


Epoch 1 Loss: 31.3056
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:27<00:00,  2.09it/s, loss=0.545]


Epoch 2 Loss: 15.7051
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:28<00:00,  2.01it/s, loss=0.212] 


Epoch 3 Loss: 11.1342
Test Metrics: Precision=0.9155, Recall=0.9155, F1=0.9155

Fine-tuning roberta-large (large) with Train Size 460, Split 2...


Map: 100%|██████████| 460/460 [00:00<00:00, 9236.61 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7302.72 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9401.15 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:26<00:00,  2.16it/s, loss=0.174]


Epoch 1 Loss: 27.2580
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:27<00:00,  2.15it/s, loss=0.0519]


Epoch 2 Loss: 12.1876
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:26<00:00,  2.18it/s, loss=0.155] 


Epoch 3 Loss: 7.9036
Test Metrics: Precision=0.9339, Recall=0.9339, F1=0.9339

Fine-tuning bert-base-cased (small) with Train Size 460, Split 3...


Map: 100%|██████████| 460/460 [00:00<00:00, 8630.33 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7965.07 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9254.81 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:09<00:00,  5.92it/s, loss=0.357]


Epoch 1 Loss: 36.1283
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:09<00:00,  6.02it/s, loss=0.321]


Epoch 2 Loss: 17.4140
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:09<00:00,  5.92it/s, loss=0.272] 


Epoch 3 Loss: 12.8463
Test Metrics: Precision=0.9088, Recall=0.9088, F1=0.9088

Fine-tuning bert-large-cased (medium) with Train Size 460, Split 3...


Map: 100%|██████████| 460/460 [00:00<00:00, 8800.83 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7998.09 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9315.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:30<00:00,  1.92it/s, loss=0.163]


Epoch 1 Loss: 30.1030
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:30<00:00,  1.91it/s, loss=0.516]


Epoch 2 Loss: 14.3053
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:29<00:00,  1.95it/s, loss=0.274] 


Epoch 3 Loss: 9.3608
Test Metrics: Precision=0.9185, Recall=0.9185, F1=0.9185

Fine-tuning roberta-large (large) with Train Size 460, Split 3...


Map: 100%|██████████| 460/460 [00:00<00:00, 8669.38 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7701.65 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9194.15 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:27<00:00,  2.08it/s, loss=0.366]


Epoch 1 Loss: 27.9086
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:27<00:00,  2.12it/s, loss=0.398]


Epoch 2 Loss: 13.6613
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:27<00:00,  2.12it/s, loss=0.183] 


Epoch 3 Loss: 9.2238
Test Metrics: Precision=0.9326, Recall=0.9326, F1=0.9326

Fine-tuning bert-base-cased (small) with Train Size 460, Split 4...


Map: 100%|██████████| 460/460 [00:00<00:00, 8510.08 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7969.84 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9086.66 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:09<00:00,  6.22it/s, loss=0.455]


Epoch 1 Loss: 33.9042
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:09<00:00,  6.13it/s, loss=0.198]


Epoch 2 Loss: 16.6827
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:09<00:00,  6.02it/s, loss=0.276] 


Epoch 3 Loss: 12.2676
Test Metrics: Precision=0.9076, Recall=0.9076, F1=0.9076

Fine-tuning bert-large-cased (medium) with Train Size 460, Split 4...


Map: 100%|██████████| 460/460 [00:00<00:00, 8424.03 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7742.29 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9248.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:28<00:00,  2.06it/s, loss=0.398]


Epoch 1 Loss: 32.2510
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:29<00:00,  1.95it/s, loss=0.344]


Epoch 2 Loss: 16.8693
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:29<00:00,  1.96it/s, loss=0.144]


Epoch 3 Loss: 11.7415
Test Metrics: Precision=0.9135, Recall=0.9135, F1=0.9135

Fine-tuning roberta-large (large) with Train Size 460, Split 4...


Map: 100%|██████████| 460/460 [00:00<00:00, 9081.44 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7623.14 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9374.91 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:27<00:00,  2.13it/s, loss=0.261]


Epoch 1 Loss: 30.1766
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:26<00:00,  2.16it/s, loss=0.226]


Epoch 2 Loss: 12.9662
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:27<00:00,  2.15it/s, loss=0.275] 


Epoch 3 Loss: 8.7319
Test Metrics: Precision=0.9306, Recall=0.9306, F1=0.9306

Fine-tuning bert-base-cased (small) with Train Size 460, Split 5...


Map: 100%|██████████| 460/460 [00:00<00:00, 8697.56 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7364.89 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9489.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:09<00:00,  5.99it/s, loss=0.319]


Epoch 1 Loss: 35.8654
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:09<00:00,  5.92it/s, loss=0.274]


Epoch 2 Loss: 17.4300
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:09<00:00,  5.86it/s, loss=0.177] 


Epoch 3 Loss: 12.9423
Test Metrics: Precision=0.9105, Recall=0.9105, F1=0.9105

Fine-tuning bert-large-cased (medium) with Train Size 460, Split 5...


Map: 100%|██████████| 460/460 [00:00<00:00, 8796.42 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7106.24 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9261.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:29<00:00,  1.94it/s, loss=0.483]


Epoch 1 Loss: 31.4855
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:30<00:00,  1.91it/s, loss=0.17] 


Epoch 2 Loss: 18.1471
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:30<00:00,  1.90it/s, loss=0.193]


Epoch 3 Loss: 13.3878
Test Metrics: Precision=0.9136, Recall=0.9136, F1=0.9136

Fine-tuning roberta-large (large) with Train Size 460, Split 5...


Map: 100%|██████████| 460/460 [00:00<00:00, 8956.95 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7235.90 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9562.64 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:28<00:00,  2.01it/s, loss=0.371]


Epoch 1 Loss: 28.9494
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:28<00:00,  2.05it/s, loss=0.0987]


Epoch 2 Loss: 15.7691
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:29<00:00,  1.98it/s, loss=0.119]


Epoch 3 Loss: 10.6691
Test Metrics: Precision=0.9328, Recall=0.9328, F1=0.9328

Fine-tuning bert-base-cased (small) with Train Size 460, Split 6...


Map: 100%|██████████| 460/460 [00:00<00:00, 9010.03 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 6910.39 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 3290.48 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:09<00:00,  6.26it/s, loss=0.397]


Epoch 1 Loss: 36.1724
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:09<00:00,  6.17it/s, loss=0.277]


Epoch 2 Loss: 18.0342
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:09<00:00,  6.01it/s, loss=0.236] 


Epoch 3 Loss: 13.0722
Test Metrics: Precision=0.9069, Recall=0.9069, F1=0.9069

Fine-tuning bert-large-cased (medium) with Train Size 460, Split 6...


Map: 100%|██████████| 460/460 [00:00<00:00, 8795.98 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 6666.71 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9356.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:28<00:00,  2.01it/s, loss=0.547]


Epoch 1 Loss: 32.2163
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:29<00:00,  1.97it/s, loss=0.296]


Epoch 2 Loss: 16.1156
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:29<00:00,  1.97it/s, loss=0.0974]


Epoch 3 Loss: 11.4669
Test Metrics: Precision=0.9179, Recall=0.9179, F1=0.9179

Fine-tuning roberta-large (large) with Train Size 460, Split 6...


Map: 100%|██████████| 460/460 [00:00<00:00, 9071.92 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 6830.15 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9349.27 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:27<00:00,  2.12it/s, loss=0.292]


Epoch 1 Loss: 28.7848
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:27<00:00,  2.11it/s, loss=0.366] 


Epoch 2 Loss: 13.3871
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:26<00:00,  2.17it/s, loss=0.144] 


Epoch 3 Loss: 9.2042
Test Metrics: Precision=0.9347, Recall=0.9347, F1=0.9347

Fine-tuning bert-base-cased (small) with Train Size 460, Split 7...


Map: 100%|██████████| 460/460 [00:00<00:00, 8520.64 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7562.93 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9423.79 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:09<00:00,  5.95it/s, loss=0.9]  


Epoch 1 Loss: 36.1862
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:09<00:00,  5.81it/s, loss=0.363]


Epoch 2 Loss: 19.0527
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:09<00:00,  6.03it/s, loss=0.221] 


Epoch 3 Loss: 13.2876
Test Metrics: Precision=0.9112, Recall=0.9112, F1=0.9112

Fine-tuning bert-large-cased (medium) with Train Size 460, Split 7...


Map: 100%|██████████| 460/460 [00:00<00:00, 2586.68 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7653.54 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9291.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:29<00:00,  1.96it/s, loss=0.654]


Epoch 1 Loss: 32.9270
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:30<00:00,  1.93it/s, loss=0.232]


Epoch 2 Loss: 17.0429
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:30<00:00,  1.90it/s, loss=0.237]


Epoch 3 Loss: 11.9481
Test Metrics: Precision=0.9188, Recall=0.9188, F1=0.9188

Fine-tuning roberta-large (large) with Train Size 460, Split 7...


Map: 100%|██████████| 460/460 [00:00<00:00, 8760.03 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7415.42 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9508.76 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:27<00:00,  2.08it/s, loss=0.9]  


Epoch 1 Loss: 29.8642
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:28<00:00,  2.02it/s, loss=0.389]


Epoch 2 Loss: 15.7158
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:28<00:00,  2.01it/s, loss=0.235] 


Epoch 3 Loss: 11.6117
Test Metrics: Precision=0.9309, Recall=0.9309, F1=0.9309

Fine-tuning bert-base-cased (small) with Train Size 460, Split 8...


Map: 100%|██████████| 460/460 [00:00<00:00, 8743.76 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7752.09 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9104.32 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:10<00:00,  5.76it/s, loss=0.428]


Epoch 1 Loss: 35.3799
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:10<00:00,  5.72it/s, loss=0.187]


Epoch 2 Loss: 18.2369
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:10<00:00,  5.60it/s, loss=0.128]


Epoch 3 Loss: 13.0117
Test Metrics: Precision=0.9106, Recall=0.9106, F1=0.9106

Fine-tuning bert-large-cased (medium) with Train Size 460, Split 8...


Map: 100%|██████████| 460/460 [00:00<00:00, 8541.50 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7887.41 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9129.14 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:30<00:00,  1.88it/s, loss=0.324]


Epoch 1 Loss: 31.1241
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:30<00:00,  1.89it/s, loss=0.218] 


Epoch 2 Loss: 14.8657
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:30<00:00,  1.93it/s, loss=0.132] 


Epoch 3 Loss: 10.0513
Test Metrics: Precision=0.9222, Recall=0.9222, F1=0.9222

Fine-tuning roberta-large (large) with Train Size 460, Split 8...


Map: 100%|██████████| 460/460 [00:00<00:00, 9003.80 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 8037.41 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9544.17 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:28<00:00,  2.01it/s, loss=0.229]


Epoch 1 Loss: 30.2026
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:28<00:00,  2.03it/s, loss=0.344]


Epoch 2 Loss: 15.1850
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:29<00:00,  1.98it/s, loss=0.176] 


Epoch 3 Loss: 10.9329
Test Metrics: Precision=0.9290, Recall=0.9290, F1=0.9290

Fine-tuning bert-base-cased (small) with Train Size 460, Split 9...


Map: 100%|██████████| 460/460 [00:00<00:00, 8606.39 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 8090.66 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9093.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:09<00:00,  6.11it/s, loss=0.27] 


Epoch 1 Loss: 35.7582
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:09<00:00,  6.07it/s, loss=0.268]


Epoch 2 Loss: 17.2081
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:09<00:00,  5.92it/s, loss=0.147] 


Epoch 3 Loss: 12.0785
Test Metrics: Precision=0.9095, Recall=0.9095, F1=0.9095

Fine-tuning bert-large-cased (medium) with Train Size 460, Split 9...


Map: 100%|██████████| 460/460 [00:00<00:00, 8721.70 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 8165.13 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9109.77 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:29<00:00,  1.96it/s, loss=0.272]


Epoch 1 Loss: 32.0274
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:28<00:00,  2.02it/s, loss=0.262]


Epoch 2 Loss: 15.8555
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:29<00:00,  1.98it/s, loss=0.547] 


Epoch 3 Loss: 10.8378
Test Metrics: Precision=0.9164, Recall=0.9164, F1=0.9164

Fine-tuning roberta-large (large) with Train Size 460, Split 9...


Map: 100%|██████████| 460/460 [00:00<00:00, 8676.13 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 8014.21 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9384.65 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:26<00:00,  2.19it/s, loss=0.25] 


Epoch 1 Loss: 26.6208
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:27<00:00,  2.12it/s, loss=0.191]


Epoch 2 Loss: 13.3540
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:26<00:00,  2.16it/s, loss=0.105] 


Epoch 3 Loss: 9.0494
Test Metrics: Precision=0.9332, Recall=0.9332, F1=0.9332

Fine-tuning bert-base-cased (small) with Train Size 460, Split 10...


Map: 100%|██████████| 460/460 [00:00<00:00, 8413.85 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7652.32 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9293.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:09<00:00,  5.99it/s, loss=0.549]


Epoch 1 Loss: 34.0516
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:09<00:00,  6.15it/s, loss=0.337]


Epoch 2 Loss: 17.1236
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:09<00:00,  5.92it/s, loss=0.272] 


Epoch 3 Loss: 12.2520
Test Metrics: Precision=0.9092, Recall=0.9092, F1=0.9092

Fine-tuning bert-large-cased (medium) with Train Size 460, Split 10...


Map: 100%|██████████| 460/460 [00:00<00:00, 8327.56 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7854.50 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9022.63 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:31<00:00,  1.86it/s, loss=0.264]


Epoch 1 Loss: 30.7286
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:30<00:00,  1.92it/s, loss=0.222]


Epoch 2 Loss: 14.1485
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:29<00:00,  1.95it/s, loss=0.0804]


Epoch 3 Loss: 9.8795
Test Metrics: Precision=0.9189, Recall=0.9189, F1=0.9189

Fine-tuning roberta-large (large) with Train Size 460, Split 10...


Map: 100%|██████████| 460/460 [00:00<00:00, 8826.76 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 8192.35 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9598.20 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:28<00:00,  2.06it/s, loss=0.249]


Epoch 1 Loss: 28.2771
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:29<00:00,  1.99it/s, loss=0.224]


Epoch 2 Loss: 14.0349
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:27<00:00,  2.08it/s, loss=0.123] 


Epoch 3 Loss: 9.3687
Test Metrics: Precision=0.9333, Recall=0.9333, F1=0.9333

Fine-tuning bert-base-cased (small) with Train Size 480, Split 1...


Map: 100%|██████████| 480/480 [00:00<00:00, 8791.32 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7433.69 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 8853.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:09<00:00,  6.20it/s, loss=0.379]


Epoch 1 Loss: 37.5886
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:09<00:00,  6.33it/s, loss=0.291]


Epoch 2 Loss: 18.8930
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:09<00:00,  6.41it/s, loss=0.171]


Epoch 3 Loss: 13.8875
Test Metrics: Precision=0.9053, Recall=0.9053, F1=0.9053

Fine-tuning bert-large-cased (medium) with Train Size 480, Split 1...


Map: 100%|██████████| 480/480 [00:00<00:00, 8991.13 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7385.69 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9230.05 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:29<00:00,  2.02it/s, loss=0.426]


Epoch 1 Loss: 31.0985
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:28<00:00,  2.10it/s, loss=0.159]


Epoch 2 Loss: 15.4381
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:29<00:00,  2.03it/s, loss=0.183] 


Epoch 3 Loss: 10.6472
Test Metrics: Precision=0.9169, Recall=0.9169, F1=0.9169

Fine-tuning roberta-large (large) with Train Size 480, Split 1...


Map: 100%|██████████| 480/480 [00:00<00:00, 9009.80 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7593.93 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9387.57 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:27<00:00,  2.21it/s, loss=0.294]


Epoch 1 Loss: 30.0112
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:27<00:00,  2.19it/s, loss=0.291]


Epoch 2 Loss: 14.1528
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:26<00:00,  2.23it/s, loss=0.0935]


Epoch 3 Loss: 9.3970
Test Metrics: Precision=0.9301, Recall=0.9301, F1=0.9301

Fine-tuning bert-base-cased (small) with Train Size 480, Split 2...


Map: 100%|██████████| 480/480 [00:00<00:00, 8977.61 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7943.13 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 3349.82 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:09<00:00,  6.10it/s, loss=0.287]


Epoch 1 Loss: 37.7425
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:09<00:00,  6.41it/s, loss=0.209] 


Epoch 2 Loss: 17.4358
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:09<00:00,  6.29it/s, loss=0.135] 


Epoch 3 Loss: 12.6391
Test Metrics: Precision=0.9121, Recall=0.9121, F1=0.9121

Fine-tuning bert-large-cased (medium) with Train Size 480, Split 2...


Map: 100%|██████████| 480/480 [00:00<00:00, 8877.11 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7814.11 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 8885.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:30<00:00,  1.97it/s, loss=0.414]


Epoch 1 Loss: 32.1548
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:29<00:00,  2.00it/s, loss=0.331] 


Epoch 2 Loss: 15.1011
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:29<00:00,  2.04it/s, loss=0.225] 


Epoch 3 Loss: 9.5986
Test Metrics: Precision=0.9168, Recall=0.9168, F1=0.9168

Fine-tuning roberta-large (large) with Train Size 480, Split 2...


Map: 100%|██████████| 480/480 [00:00<00:00, 9274.39 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 8126.53 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9216.08 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:27<00:00,  2.17it/s, loss=0.234]


Epoch 1 Loss: 30.0481
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:27<00:00,  2.16it/s, loss=0.171]


Epoch 2 Loss: 14.0850
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:28<00:00,  2.14it/s, loss=0.128] 


Epoch 3 Loss: 9.0524
Test Metrics: Precision=0.9345, Recall=0.9345, F1=0.9345

Fine-tuning bert-base-cased (small) with Train Size 480, Split 3...


Map: 100%|██████████| 480/480 [00:00<00:00, 8299.46 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7371.36 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9460.11 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:10<00:00,  5.81it/s, loss=0.359]


Epoch 1 Loss: 35.7596
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:10<00:00,  5.67it/s, loss=0.236]


Epoch 2 Loss: 17.8653
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:10<00:00,  5.82it/s, loss=0.226] 


Epoch 3 Loss: 12.4892
Test Metrics: Precision=0.9087, Recall=0.9087, F1=0.9087

Fine-tuning bert-large-cased (medium) with Train Size 480, Split 3...


Map: 100%|██████████| 480/480 [00:00<00:00, 8370.16 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7497.36 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9594.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:30<00:00,  1.96it/s, loss=0.379]


Epoch 1 Loss: 30.2283
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:31<00:00,  1.90it/s, loss=0.325]


Epoch 2 Loss: 14.6535
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:31<00:00,  1.90it/s, loss=0.192] 


Epoch 3 Loss: 10.1545
Test Metrics: Precision=0.9171, Recall=0.9171, F1=0.9171

Fine-tuning roberta-large (large) with Train Size 480, Split 3...


Map: 100%|██████████| 480/480 [00:00<00:00, 8601.72 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7268.76 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9628.44 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:29<00:00,  2.05it/s, loss=0.459]


Epoch 1 Loss: 31.2815
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:29<00:00,  2.04it/s, loss=0.197]


Epoch 2 Loss: 14.4671
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:29<00:00,  2.02it/s, loss=0.167] 


Epoch 3 Loss: 10.3248
Test Metrics: Precision=0.9282, Recall=0.9282, F1=0.9282

Fine-tuning bert-base-cased (small) with Train Size 480, Split 4...


Map: 100%|██████████| 480/480 [00:00<00:00, 8636.70 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7324.16 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9165.34 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:09<00:00,  6.14it/s, loss=0.308]


Epoch 1 Loss: 35.0335
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:09<00:00,  6.13it/s, loss=0.242]


Epoch 2 Loss: 17.5001
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:09<00:00,  6.22it/s, loss=0.235] 


Epoch 3 Loss: 12.1261
Test Metrics: Precision=0.9096, Recall=0.9096, F1=0.9096

Fine-tuning bert-large-cased (medium) with Train Size 480, Split 4...


Map: 100%|██████████| 480/480 [00:00<00:00, 8607.19 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7158.53 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9120.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:30<00:00,  1.97it/s, loss=0.284]


Epoch 1 Loss: 30.5683
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:30<00:00,  1.96it/s, loss=0.129]


Epoch 2 Loss: 14.4054
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:30<00:00,  2.00it/s, loss=0.105] 


Epoch 3 Loss: 8.8390
Test Metrics: Precision=0.9230, Recall=0.9230, F1=0.9230

Fine-tuning roberta-large (large) with Train Size 480, Split 4...


Map: 100%|██████████| 480/480 [00:00<00:00, 8877.50 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7580.92 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9468.40 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:28<00:00,  2.13it/s, loss=0.266]


Epoch 1 Loss: 28.9898
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:27<00:00,  2.14it/s, loss=0.291]


Epoch 2 Loss: 16.7290
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:27<00:00,  2.15it/s, loss=0.153] 


Epoch 3 Loss: 11.8623
Test Metrics: Precision=0.9262, Recall=0.9262, F1=0.9262

Fine-tuning bert-base-cased (small) with Train Size 480, Split 5...


Map: 100%|██████████| 480/480 [00:00<00:00, 8754.89 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 8278.06 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9228.42 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:10<00:00,  5.96it/s, loss=0.31] 


Epoch 1 Loss: 35.2276
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:10<00:00,  5.73it/s, loss=0.254]


Epoch 2 Loss: 17.8412
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:10<00:00,  5.72it/s, loss=0.16]  


Epoch 3 Loss: 12.1517
Test Metrics: Precision=0.9163, Recall=0.9163, F1=0.9163

Fine-tuning bert-large-cased (medium) with Train Size 480, Split 5...


Map: 100%|██████████| 480/480 [00:00<00:00, 8678.54 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 8152.19 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9209.37 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:31<00:00,  1.88it/s, loss=0.299]


Epoch 1 Loss: 33.3074
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:30<00:00,  1.97it/s, loss=0.269] 


Epoch 2 Loss: 16.2279
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:31<00:00,  1.89it/s, loss=0.106] 


Epoch 3 Loss: 11.3012
Test Metrics: Precision=0.9191, Recall=0.9191, F1=0.9191

Fine-tuning roberta-large (large) with Train Size 480, Split 5...


Map: 100%|██████████| 480/480 [00:00<00:00, 8884.47 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 8065.00 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9481.87 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:29<00:00,  2.03it/s, loss=0.29] 


Epoch 1 Loss: 31.0863
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:28<00:00,  2.09it/s, loss=0.261]


Epoch 2 Loss: 13.8024
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:28<00:00,  2.08it/s, loss=0.113] 


Epoch 3 Loss: 9.5779
Test Metrics: Precision=0.9343, Recall=0.9343, F1=0.9343

Fine-tuning bert-base-cased (small) with Train Size 480, Split 6...


Map: 100%|██████████| 480/480 [00:00<00:00, 8945.86 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 6968.97 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9130.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:09<00:00,  6.03it/s, loss=0.309]


Epoch 1 Loss: 36.8241
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:09<00:00,  6.11it/s, loss=0.513]


Epoch 2 Loss: 17.8937
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:09<00:00,  6.05it/s, loss=0.181]


Epoch 3 Loss: 14.3750
Test Metrics: Precision=0.9107, Recall=0.9107, F1=0.9107

Fine-tuning bert-large-cased (medium) with Train Size 480, Split 6...


Map: 100%|██████████| 480/480 [00:00<00:00, 8895.03 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7157.39 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9095.63 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:30<00:00,  1.95it/s, loss=0.396]


Epoch 1 Loss: 28.8843
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:31<00:00,  1.92it/s, loss=0.267]


Epoch 2 Loss: 14.1074
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:30<00:00,  1.95it/s, loss=0.143] 


Epoch 3 Loss: 8.9043
Test Metrics: Precision=0.9229, Recall=0.9229, F1=0.9229

Fine-tuning roberta-large (large) with Train Size 480, Split 6...


Map: 100%|██████████| 480/480 [00:00<00:00, 9035.23 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 6934.41 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9115.28 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:27<00:00,  2.18it/s, loss=0.233]


Epoch 1 Loss: 32.5390
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:29<00:00,  2.03it/s, loss=0.406]


Epoch 2 Loss: 15.2522
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:28<00:00,  2.09it/s, loss=0.169] 


Epoch 3 Loss: 10.7512
Test Metrics: Precision=0.9313, Recall=0.9313, F1=0.9313

Fine-tuning bert-base-cased (small) with Train Size 480, Split 7...


Map: 100%|██████████| 480/480 [00:00<00:00, 8438.50 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 8127.02 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9098.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:10<00:00,  5.85it/s, loss=0.301]


Epoch 1 Loss: 36.3138
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:10<00:00,  5.87it/s, loss=0.209]


Epoch 2 Loss: 17.9884
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:10<00:00,  5.81it/s, loss=0.142]


Epoch 3 Loss: 12.6293
Test Metrics: Precision=0.9134, Recall=0.9134, F1=0.9134

Fine-tuning bert-large-cased (medium) with Train Size 480, Split 7...


Map: 100%|██████████| 480/480 [00:00<00:00, 8540.45 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 8491.03 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9137.53 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:31<00:00,  1.89it/s, loss=0.412]


Epoch 1 Loss: 33.6180
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:30<00:00,  1.95it/s, loss=0.15]  


Epoch 2 Loss: 15.8331
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:31<00:00,  1.93it/s, loss=0.216] 


Epoch 3 Loss: 10.5567
Test Metrics: Precision=0.9189, Recall=0.9189, F1=0.9189

Fine-tuning roberta-large (large) with Train Size 480, Split 7...


Map: 100%|██████████| 480/480 [00:00<00:00, 8843.42 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 8390.53 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9343.94 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:29<00:00,  2.05it/s, loss=0.372]


Epoch 1 Loss: 27.5750
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:29<00:00,  2.03it/s, loss=0.189] 


Epoch 2 Loss: 13.2727
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:29<00:00,  2.04it/s, loss=0.124] 


Epoch 3 Loss: 8.4223
Test Metrics: Precision=0.9345, Recall=0.9345, F1=0.9345

Fine-tuning bert-base-cased (small) with Train Size 480, Split 8...


Map: 100%|██████████| 480/480 [00:00<00:00, 8574.86 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7510.50 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9114.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:10<00:00,  5.56it/s, loss=0.455]


Epoch 1 Loss: 37.2613
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:10<00:00,  5.61it/s, loss=0.255]


Epoch 2 Loss: 18.1692
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:10<00:00,  5.78it/s, loss=0.192]


Epoch 3 Loss: 12.9939
Test Metrics: Precision=0.9107, Recall=0.9107, F1=0.9107

Fine-tuning bert-large-cased (medium) with Train Size 480, Split 8...


Map: 100%|██████████| 480/480 [00:00<00:00, 8779.17 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7627.16 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9394.05 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:32<00:00,  1.84it/s, loss=0.315]


Epoch 1 Loss: 36.9775
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:32<00:00,  1.86it/s, loss=0.358]


Epoch 2 Loss: 23.9057
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:32<00:00,  1.86it/s, loss=0.439]


Epoch 3 Loss: 18.6956
Test Metrics: Precision=0.8966, Recall=0.8966, F1=0.8966

Fine-tuning roberta-large (large) with Train Size 480, Split 8...


Map: 100%|██████████| 480/480 [00:00<00:00, 8778.98 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7455.71 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9375.12 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:29<00:00,  2.02it/s, loss=0.351]


Epoch 1 Loss: 27.7118
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:28<00:00,  2.07it/s, loss=0.209]


Epoch 2 Loss: 12.8233
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:30<00:00,  1.97it/s, loss=0.12]  


Epoch 3 Loss: 9.0153
Test Metrics: Precision=0.9341, Recall=0.9341, F1=0.9341

Fine-tuning bert-base-cased (small) with Train Size 480, Split 9...


Map: 100%|██████████| 480/480 [00:00<00:00, 2687.93 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 8474.59 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9308.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:10<00:00,  5.97it/s, loss=0.455]


Epoch 1 Loss: 33.8993
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:09<00:00,  6.08it/s, loss=0.263]


Epoch 2 Loss: 16.4436
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:10<00:00,  5.96it/s, loss=0.192] 


Epoch 3 Loss: 11.7537
Test Metrics: Precision=0.9062, Recall=0.9062, F1=0.9062

Fine-tuning bert-large-cased (medium) with Train Size 480, Split 9...


Map: 100%|██████████| 480/480 [00:00<00:00, 8547.01 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 8033.78 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9236.53 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:31<00:00,  1.93it/s, loss=0.252]


Epoch 1 Loss: 29.1410
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:30<00:00,  1.95it/s, loss=0.223]


Epoch 2 Loss: 13.7384
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:31<00:00,  1.93it/s, loss=0.237] 


Epoch 3 Loss: 8.8607
Test Metrics: Precision=0.9183, Recall=0.9183, F1=0.9183

Fine-tuning roberta-large (large) with Train Size 480, Split 9...


Map: 100%|██████████| 480/480 [00:00<00:00, 8698.60 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 8336.85 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9538.70 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:28<00:00,  2.10it/s, loss=0.332]


Epoch 1 Loss: 28.0701
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:28<00:00,  2.10it/s, loss=0.153]


Epoch 2 Loss: 13.1344
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:28<00:00,  2.07it/s, loss=0.154] 


Epoch 3 Loss: 8.7075
Test Metrics: Precision=0.9322, Recall=0.9322, F1=0.9322

Fine-tuning bert-base-cased (small) with Train Size 480, Split 10...


Map: 100%|██████████| 480/480 [00:00<00:00, 8348.78 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 8491.93 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9121.50 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:10<00:00,  5.94it/s, loss=0.388]


Epoch 1 Loss: 36.4145
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:10<00:00,  5.90it/s, loss=0.19] 


Epoch 2 Loss: 18.0673
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:10<00:00,  5.94it/s, loss=0.185] 


Epoch 3 Loss: 13.0990
Test Metrics: Precision=0.9105, Recall=0.9105, F1=0.9105

Fine-tuning bert-large-cased (medium) with Train Size 480, Split 10...


Map: 100%|██████████| 480/480 [00:00<00:00, 8352.55 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 8689.67 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9310.39 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:31<00:00,  1.90it/s, loss=0.273]


Epoch 1 Loss: 30.4629
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:31<00:00,  1.90it/s, loss=0.209]


Epoch 2 Loss: 14.1685
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:31<00:00,  1.88it/s, loss=0.227] 


Epoch 3 Loss: 8.9579
Test Metrics: Precision=0.9218, Recall=0.9218, F1=0.9218

Fine-tuning roberta-large (large) with Train Size 480, Split 10...


Map: 100%|██████████| 480/480 [00:00<00:00, 8694.77 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 8708.46 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9607.34 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:29<00:00,  2.01it/s, loss=0.402]


Epoch 1 Loss: 28.3107
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:29<00:00,  2.05it/s, loss=0.237]


Epoch 2 Loss: 13.1275
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:28<00:00,  2.08it/s, loss=0.122] 


Epoch 3 Loss: 8.8958
Test Metrics: Precision=0.9351, Recall=0.9351, F1=0.9351

Fine-tuning bert-base-cased (small) with Train Size 500, Split 1...


Map: 100%|██████████| 500/500 [00:00<00:00, 8638.18 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7456.14 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9040.26 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:09<00:00,  6.45it/s, loss=0.649]


Epoch 1 Loss: 37.0915
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:09<00:00,  6.42it/s, loss=0.103]


Epoch 2 Loss: 17.7207
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:09<00:00,  6.34it/s, loss=0.107] 


Epoch 3 Loss: 12.6855
Test Metrics: Precision=0.9105, Recall=0.9105, F1=0.9105

Fine-tuning bert-large-cased (medium) with Train Size 500, Split 1...


Map: 100%|██████████| 500/500 [00:00<00:00, 8532.64 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7826.65 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9034.19 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:29<00:00,  2.11it/s, loss=0.169]


Epoch 1 Loss: 33.8719
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:30<00:00,  2.06it/s, loss=0.251]


Epoch 2 Loss: 17.0021
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:30<00:00,  2.09it/s, loss=0.12]  


Epoch 3 Loss: 12.8218
Test Metrics: Precision=0.9171, Recall=0.9171, F1=0.9171

Fine-tuning roberta-large (large) with Train Size 500, Split 1...


Map: 100%|██████████| 500/500 [00:00<00:00, 9026.41 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7400.62 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9199.12 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:28<00:00,  2.25it/s, loss=0.515]


Epoch 1 Loss: 33.1275
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:27<00:00,  2.30it/s, loss=0.119]


Epoch 2 Loss: 14.7119
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:27<00:00,  2.27it/s, loss=0.141] 


Epoch 3 Loss: 10.1696
Test Metrics: Precision=0.9329, Recall=0.9329, F1=0.9329

Fine-tuning bert-base-cased (small) with Train Size 500, Split 2...


Map: 100%|██████████| 500/500 [00:00<00:00, 8917.45 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7930.99 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 8865.98 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:10<00:00,  6.26it/s, loss=0.316]


Epoch 1 Loss: 36.9040
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:09<00:00,  6.40it/s, loss=0.243]


Epoch 2 Loss: 18.6204
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:09<00:00,  6.50it/s, loss=0.369] 


Epoch 3 Loss: 12.9738
Test Metrics: Precision=0.9086, Recall=0.9086, F1=0.9086

Fine-tuning bert-large-cased (medium) with Train Size 500, Split 2...


Map: 100%|██████████| 500/500 [00:00<00:00, 9428.96 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 8124.56 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9020.61 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:30<00:00,  2.04it/s, loss=0.169]


Epoch 1 Loss: 31.7076
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:31<00:00,  1.98it/s, loss=0.208] 


Epoch 2 Loss: 15.5862
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:30<00:00,  2.04it/s, loss=0.122] 


Epoch 3 Loss: 10.3902
Test Metrics: Precision=0.9167, Recall=0.9167, F1=0.9167

Fine-tuning roberta-large (large) with Train Size 500, Split 2...


Map: 100%|██████████| 500/500 [00:00<00:00, 9177.35 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 8037.84 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9035.66 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:28<00:00,  2.22it/s, loss=0.153]


Epoch 1 Loss: 28.2893
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:29<00:00,  2.15it/s, loss=0.0653]


Epoch 2 Loss: 13.3484
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:29<00:00,  2.16it/s, loss=0.0847]


Epoch 3 Loss: 8.8427
Test Metrics: Precision=0.9362, Recall=0.9362, F1=0.9362

Fine-tuning bert-base-cased (small) with Train Size 500, Split 3...


Map: 100%|██████████| 500/500 [00:00<00:00, 8329.57 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 8437.04 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9283.05 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:10<00:00,  5.92it/s, loss=0.246]


Epoch 1 Loss: 36.1415
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:10<00:00,  5.78it/s, loss=0.177]


Epoch 2 Loss: 18.6167
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:10<00:00,  6.04it/s, loss=0.197] 


Epoch 3 Loss: 13.4641
Test Metrics: Precision=0.9030, Recall=0.9030, F1=0.9030

Fine-tuning bert-large-cased (medium) with Train Size 500, Split 3...


Map: 100%|██████████| 500/500 [00:00<00:00, 8752.65 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 8402.89 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9388.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:32<00:00,  1.91it/s, loss=0.312]


Epoch 1 Loss: 32.4229
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:33<00:00,  1.91it/s, loss=0.187]


Epoch 2 Loss: 17.0501
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:32<00:00,  1.97it/s, loss=0.203] 


Epoch 3 Loss: 11.5773
Test Metrics: Precision=0.9130, Recall=0.9130, F1=0.9130

Fine-tuning roberta-large (large) with Train Size 500, Split 3...


Map: 100%|██████████| 500/500 [00:00<00:00, 8500.06 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 8491.18 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9472.97 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:30<00:00,  2.07it/s, loss=0.343]


Epoch 1 Loss: 31.4187
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:30<00:00,  2.09it/s, loss=0.286]


Epoch 2 Loss: 15.0847
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:30<00:00,  2.06it/s, loss=0.232] 


Epoch 3 Loss: 11.1816
Test Metrics: Precision=0.9309, Recall=0.9309, F1=0.9309

Fine-tuning bert-base-cased (small) with Train Size 500, Split 4...


Map: 100%|██████████| 500/500 [00:00<00:00, 8590.20 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7621.30 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9088.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:10<00:00,  6.15it/s, loss=0.388]


Epoch 1 Loss: 38.0097
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:10<00:00,  6.16it/s, loss=0.355]


Epoch 2 Loss: 18.0114
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:10<00:00,  6.09it/s, loss=0.297] 


Epoch 3 Loss: 13.6879
Test Metrics: Precision=0.9093, Recall=0.9093, F1=0.9093

Fine-tuning bert-large-cased (medium) with Train Size 500, Split 4...


Map: 100%|██████████| 500/500 [00:00<00:00, 8761.97 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7446.61 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9102.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:31<00:00,  1.98it/s, loss=0.653]


Epoch 1 Loss: 37.4448
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:31<00:00,  1.99it/s, loss=0.271]


Epoch 2 Loss: 19.7947
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:31<00:00,  2.01it/s, loss=0.224]


Epoch 3 Loss: 14.8879
Test Metrics: Precision=0.9072, Recall=0.9072, F1=0.9072

Fine-tuning roberta-large (large) with Train Size 500, Split 4...


Map: 100%|██████████| 500/500 [00:00<00:00, 2630.21 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7063.14 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9276.68 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:29<00:00,  2.17it/s, loss=0.274]


Epoch 1 Loss: 29.9808
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:29<00:00,  2.13it/s, loss=0.145]


Epoch 2 Loss: 14.1952
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:29<00:00,  2.15it/s, loss=0.227] 


Epoch 3 Loss: 10.2135
Test Metrics: Precision=0.9309, Recall=0.9309, F1=0.9309

Fine-tuning bert-base-cased (small) with Train Size 500, Split 5...


Map: 100%|██████████| 500/500 [00:00<00:00, 8965.98 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 8861.27 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 8887.92 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:10<00:00,  5.94it/s, loss=0.28] 


Epoch 1 Loss: 37.3274
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:10<00:00,  5.99it/s, loss=0.185]


Epoch 2 Loss: 19.1424
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:10<00:00,  5.86it/s, loss=0.143] 


Epoch 3 Loss: 13.5738
Test Metrics: Precision=0.9112, Recall=0.9112, F1=0.9112

Fine-tuning bert-large-cased (medium) with Train Size 500, Split 5...


Map: 100%|██████████| 500/500 [00:00<00:00, 8658.44 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 8665.74 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 8984.64 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:32<00:00,  1.92it/s, loss=0.254]


Epoch 1 Loss: 33.9216
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:32<00:00,  1.95it/s, loss=0.374]


Epoch 2 Loss: 17.1992
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:32<00:00,  1.93it/s, loss=0.199] 


Epoch 3 Loss: 11.0689
Test Metrics: Precision=0.9189, Recall=0.9189, F1=0.9189

Fine-tuning roberta-large (large) with Train Size 500, Split 5...


Map: 100%|██████████| 500/500 [00:00<00:00, 9039.76 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 8802.50 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9353.14 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:30<00:00,  2.03it/s, loss=0.174]


Epoch 1 Loss: 29.3913
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:31<00:00,  2.01it/s, loss=0.352]


Epoch 2 Loss: 14.4753
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:30<00:00,  2.07it/s, loss=0.0805]


Epoch 3 Loss: 9.8041
Test Metrics: Precision=0.9338, Recall=0.9338, F1=0.9338

Fine-tuning bert-base-cased (small) with Train Size 500, Split 6...


Map: 100%|██████████| 500/500 [00:00<00:00, 8998.88 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7032.94 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9211.63 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:10<00:00,  6.09it/s, loss=0.587]


Epoch 1 Loss: 37.7585
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:10<00:00,  6.05it/s, loss=0.196]


Epoch 2 Loss: 18.7345
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:10<00:00,  6.20it/s, loss=0.167] 


Epoch 3 Loss: 13.6218
Test Metrics: Precision=0.9121, Recall=0.9121, F1=0.9121

Fine-tuning bert-large-cased (medium) with Train Size 500, Split 6...


Map: 100%|██████████| 500/500 [00:00<00:00, 9207.77 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7294.70 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 8965.59 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:31<00:00,  2.02it/s, loss=0.513]


Epoch 1 Loss: 34.0187
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:31<00:00,  1.99it/s, loss=0.0987]


Epoch 2 Loss: 17.2445
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:30<00:00,  2.04it/s, loss=0.11]  


Epoch 3 Loss: 11.1250
Test Metrics: Precision=0.9165, Recall=0.9165, F1=0.9165

Fine-tuning roberta-large (large) with Train Size 500, Split 6...


Map: 100%|██████████| 500/500 [00:00<00:00, 9483.54 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7727.45 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9393.38 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:30<00:00,  2.06it/s, loss=0.221]


Epoch 1 Loss: 30.8285
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:29<00:00,  2.12it/s, loss=0.232]


Epoch 2 Loss: 15.1095
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:29<00:00,  2.12it/s, loss=0.149] 


Epoch 3 Loss: 10.6663
Test Metrics: Precision=0.9317, Recall=0.9317, F1=0.9317

Fine-tuning bert-base-cased (small) with Train Size 500, Split 7...


Map: 100%|██████████| 500/500 [00:00<00:00, 8529.20 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 6773.53 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9416.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:10<00:00,  6.05it/s, loss=0.861]


Epoch 1 Loss: 37.5869
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:10<00:00,  5.83it/s, loss=0.226]


Epoch 2 Loss: 19.2300
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:10<00:00,  5.90it/s, loss=0.167]


Epoch 3 Loss: 13.7544
Test Metrics: Precision=0.9126, Recall=0.9126, F1=0.9126

Fine-tuning bert-large-cased (medium) with Train Size 500, Split 7...


Map: 100%|██████████| 500/500 [00:00<00:00, 8505.23 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7031.64 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9665.26 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:32<00:00,  1.94it/s, loss=0.139]


Epoch 1 Loss: 32.8486
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:32<00:00,  1.92it/s, loss=0.172]


Epoch 2 Loss: 17.0985
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:32<00:00,  1.94it/s, loss=0.202] 


Epoch 3 Loss: 11.6422
Test Metrics: Precision=0.9188, Recall=0.9188, F1=0.9188

Fine-tuning roberta-large (large) with Train Size 500, Split 7...


Map: 100%|██████████| 500/500 [00:00<00:00, 8765.45 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7116.71 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9846.39 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:31<00:00,  2.03it/s, loss=0.248]


Epoch 1 Loss: 28.9964
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:30<00:00,  2.05it/s, loss=0.148] 


Epoch 2 Loss: 13.5075
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:31<00:00,  2.03it/s, loss=0.191] 


Epoch 3 Loss: 9.5072
Test Metrics: Precision=0.9336, Recall=0.9336, F1=0.9336

Fine-tuning bert-base-cased (small) with Train Size 500, Split 8...


Map: 100%|██████████| 500/500 [00:00<00:00, 8583.91 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7111.40 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9482.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:10<00:00,  5.83it/s, loss=0.304]


Epoch 1 Loss: 40.1095
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:10<00:00,  5.85it/s, loss=0.284]


Epoch 2 Loss: 20.1604
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:11<00:00,  5.69it/s, loss=0.427]


Epoch 3 Loss: 15.3217
Test Metrics: Precision=0.9063, Recall=0.9063, F1=0.9063

Fine-tuning bert-large-cased (medium) with Train Size 500, Split 8...


Map: 100%|██████████| 500/500 [00:00<00:00, 8528.75 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 6830.78 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9223.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:32<00:00,  1.92it/s, loss=0.474]


Epoch 1 Loss: 31.1685
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:33<00:00,  1.90it/s, loss=0.456]


Epoch 2 Loss: 15.6335
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:33<00:00,  1.90it/s, loss=0.0704]


Epoch 3 Loss: 9.3699
Test Metrics: Precision=0.9233, Recall=0.9233, F1=0.9233

Fine-tuning roberta-large (large) with Train Size 500, Split 8...


Map: 100%|██████████| 500/500 [00:00<00:00, 8542.51 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 6111.92 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9130.61 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:31<00:00,  2.03it/s, loss=0.476]


Epoch 1 Loss: 29.1134
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:31<00:00,  2.00it/s, loss=0.192]


Epoch 2 Loss: 14.8184
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:29<00:00,  2.10it/s, loss=0.105] 


Epoch 3 Loss: 10.3055
Test Metrics: Precision=0.9319, Recall=0.9319, F1=0.9319

Fine-tuning bert-base-cased (small) with Train Size 500, Split 9...


Map: 100%|██████████| 500/500 [00:00<00:00, 8542.23 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7868.80 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9385.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:10<00:00,  6.07it/s, loss=0.221]


Epoch 1 Loss: 36.2427
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:10<00:00,  6.00it/s, loss=0.196]


Epoch 2 Loss: 17.6444
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:10<00:00,  6.08it/s, loss=0.159] 


Epoch 3 Loss: 12.6928
Test Metrics: Precision=0.9079, Recall=0.9079, F1=0.9079

Fine-tuning bert-large-cased (medium) with Train Size 500, Split 9...


Map: 100%|██████████| 500/500 [00:00<00:00, 8563.75 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7772.99 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9272.31 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:32<00:00,  1.97it/s, loss=0.186]


Epoch 1 Loss: 31.4131
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:32<00:00,  1.93it/s, loss=0.121] 


Epoch 2 Loss: 14.4020
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:32<00:00,  1.94it/s, loss=0.0973]


Epoch 3 Loss: 9.2112
Test Metrics: Precision=0.9154, Recall=0.9154, F1=0.9154

Fine-tuning roberta-large (large) with Train Size 500, Split 9...


Map: 100%|██████████| 500/500 [00:00<00:00, 8757.03 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7721.61 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9663.92 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:29<00:00,  2.10it/s, loss=0.333]


Epoch 1 Loss: 28.1221
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:29<00:00,  2.16it/s, loss=0.165]


Epoch 2 Loss: 14.9957
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:29<00:00,  2.16it/s, loss=0.0628]


Epoch 3 Loss: 9.9603
Test Metrics: Precision=0.9293, Recall=0.9293, F1=0.9293

Fine-tuning bert-base-cased (small) with Train Size 500, Split 10...


Map: 100%|██████████| 500/500 [00:00<00:00, 8414.15 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7509.68 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9128.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:10<00:00,  5.95it/s, loss=0.719]


Epoch 1 Loss: 35.8831
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:10<00:00,  5.95it/s, loss=0.254]


Epoch 2 Loss: 18.7975
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:10<00:00,  5.86it/s, loss=0.442]


Epoch 3 Loss: 13.4921
Test Metrics: Precision=0.9115, Recall=0.9115, F1=0.9115

Fine-tuning bert-large-cased (medium) with Train Size 500, Split 10...


Map: 100%|██████████| 500/500 [00:00<00:00, 8378.86 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7479.54 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9407.43 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-large-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:32<00:00,  1.96it/s, loss=0.412]


Epoch 1 Loss: 33.5327
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:32<00:00,  1.95it/s, loss=0.158]


Epoch 2 Loss: 17.3608
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:32<00:00,  1.92it/s, loss=0.314] 


Epoch 3 Loss: 12.8980
Test Metrics: Precision=0.9140, Recall=0.9140, F1=0.9140

Fine-tuning roberta-large (large) with Train Size 500, Split 10...


Map: 100%|██████████| 500/500 [00:00<00:00, 8731.91 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7726.59 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9612.85 examples/s]
Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:30<00:00,  2.10it/s, loss=0.257]


Epoch 1 Loss: 28.7331
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:30<00:00,  2.05it/s, loss=0.271]


Epoch 2 Loss: 13.5108
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:30<00:00,  2.07it/s, loss=0.122] 


Epoch 3 Loss: 8.9374
Test Metrics: Precision=0.9363, Recall=0.9363, F1=0.9363
Results saved to Experiments_moreksplits10_lesssteps20_for_smoother_graphh.xlsx


### Lipitor Analysis

In [19]:
lipitor_dataset_path = "output_datasets/LIPITOR_combined_output.txt"
dataset_dict_lip = create_dataset_from_final_file(lipitor_dataset_path)
dataset_lip = dataset_dict_lip['full_data']

label_names_lip = dataset_lip.features["ner_tags"].feature.names
id2label = {i: label for i, label in enumerate(label_names_lip)}
label2id = {v: k for k, v in id2label.items()}

Casting the dataset: 100%|██████████| 999/999 [00:00<00:00, 22538.98 examples/s]


In [20]:
def iterate_and_finetune_with_torch_lip(
    dataset,
    file_name,
    models,
    start_size=5,
    end_size=500,
    step_size=5,
    k_splits=5,
    batch_size=8,
    learning_rate=5e-5,
    weight_decay=0.0,
    num_epochs=3,
):
    """
    Fine-tune models with varying dataset sizes and k-fold splits, saving results to Excel.

    Parameters:
    - dataset (DatasetDict): Dataset for training, validation, and testing.
    - file_name (str): Excel file to save results.
    - models (dict): Dictionary of model names and their sizes.
    - start_size (int): Starting size for training datasets.
    - end_size (int): Maximum size for training datasets.
    - step_size (int): Step size for increasing dataset sizes.
    - k_splits (int): Number of k-fold splits.
    - batch_size (int): Training batch size.
    - learning_rate (float): Learning rate for fine-tuning.
    - weight_decay (float): Weight decay for optimizer.
    - num_epochs (int): Number of training epochs.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Check or create the results file
    if os.path.exists(file_name):
        results_df = pd.read_excel(file_name)
    else:
        results_df = pd.DataFrame(columns=["Train Size", "K-Fold", "Test F1", "Model"])

    for train_size in range(start_size, end_size + 1, step_size):
        for split in range(k_splits):
            for size, model_name in models.items():
                print(f"\nFine-tuning {model_name} ({size}) with Train Size {train_size}, Split {split + 1}...")

                # Initialize tokenizer
                if size == "large":
                    tokenizer = RobertaTokenizerFast.from_pretrained("roberta-large", add_prefix_space=True)
                else:
                    tokenizer = AutoTokenizer.from_pretrained(model_name)

                data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

                # Generate datasets
                train_datasets = generate_train_datasets(
                    dataset, number_of_samples=train_size, number_of_splits=k_splits
                )
                _, train_dataset, train_indices = train_datasets[split]

                val_datasets = generate_validation_datasets(
                    dataset, train_indices=train_indices, number_of_samples=train_size, number_of_splits=k_splits
                )
                _, val_dataset, val_indices = val_datasets[split]

                test_datasets = generate_test_datasets(
                    dataset, train_indices=train_indices, val_indices=val_indices,
                    number_of_samples=train_size, number_of_splits=k_splits
                )
                _, test_dataset = test_datasets[split]

                def align_labels_with_tokens(labels_, word_ids):
                    new_labels = []
                    current_word = None

                    for word_id in word_ids:
                        if word_id != current_word:
                            current_word = word_id
                            label = -100 if word_id is None else labels_[word_id]
                            new_labels.append(label)

                        elif word_id is None:
                            # Special token
                            new_labels.append(-100)

                        else:
                            # Same word as previous token
                            label = labels_[word_id]

                            # If the label is B-XXX we change it to I-XXX
                            if label % 2 == 1:
                                label += 1
                            new_labels.append(label)

                    return new_labels


                def tokenize_and_align_labels(examples):
                    tokenized_inputs = tokenizer(
                        examples["tokens"], truncation=True,
                        is_split_into_words=True
                    )
                    all_labels = examples["ner_tags"]
                    new_labels = []
                    for i, labels_ in enumerate(all_labels):
                        word_ids = tokenized_inputs.word_ids(i)
                        new_labels.append(align_labels_with_tokens(labels_, word_ids))

                    tokenized_inputs["labels"] = new_labels
                    return tokenized_inputs

                # Tokenize datasets
                tokenized_train = train_dataset.map(tokenize_and_align_labels, batched=True,  remove_columns=dataset.column_names)
                tokenized_val = val_dataset.map(tokenize_and_align_labels, batched=True,  remove_columns=dataset.column_names)
                tokenized_test = test_dataset.map(tokenize_and_align_labels, batched=True,  remove_columns=dataset.column_names)

                train_dataloader = DataLoader(tokenized_train, batch_size=batch_size, shuffle=True, collate_fn=data_collator)
                val_dataloader = DataLoader(tokenized_val, batch_size=batch_size, collate_fn=data_collator)
                test_dataloader = DataLoader(tokenized_test, batch_size=batch_size, collate_fn=data_collator)

                # Initialize the model for token classification
                model = AutoModelForTokenClassification.from_pretrained(
                    model_name, id2label=id2label, label2id=label2id
                )
                
                #Optimize GPU ram at cost of some speed
                model.gradient_checkpointing_enable()


                optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
                num_training_steps = num_epochs * len(train_dataloader)
                lr_scheduler = get_scheduler(
                    "linear",
                    optimizer=optimizer,
                    num_warmup_steps=0,
                    num_training_steps=num_training_steps
                )

                accelerator = Accelerator()
                model, optimizer, train_dataloader, val_dataloader = accelerator.prepare(
                    model, optimizer, train_dataloader, val_dataloader
                )


                # Training loop
                for epoch in range(num_epochs):
                    print(f"Epoch {epoch + 1}/{num_epochs}")
                    model.train()
                    total_loss = 0
                    progress_bar = tqdm(train_dataloader, desc=f"Training Epoch {epoch+1}")
                    for batch in progress_bar:
                        batch = {k: v.to(device) for k, v in batch.items()}
                        outputs = model(**batch)
                        loss = outputs.loss
                        total_loss += loss.item()

                        accelerator.backward(loss)
                        optimizer.step()
                        lr_scheduler.step()
                        optimizer.zero_grad()
                        progress_bar.set_postfix(loss=loss.item())

                    print(f"Epoch {epoch + 1} Loss: {total_loss:.4f}")

                # Validation loop
                model.eval()
                val_predictions, val_labels = [], []
                with torch.no_grad():
                    for batch in val_dataloader:
                        batch = {k: v.to(device) for k, v in batch.items()}
                        outputs = model(**batch)
                        logits = outputs.logits
                        predictions = logits.argmax(dim=-1)
                        labels = batch["labels"]

                        predictions = accelerator.gather(predictions)
                        labels = accelerator.gather(labels)

                        flat_labels, flat_predictions = postprocess(predictions, labels)
                        val_labels.extend(flat_labels)
                        val_predictions.extend(flat_predictions)

                # Test loop
                test_predictions, test_labels = [], []
                with torch.no_grad():
                    for batch in test_dataloader:
                        batch = {k: v.to(device) for k, v in batch.items()}
                        outputs = model(**batch)
                        logits = outputs.logits
                        predictions = logits.argmax(dim=-1)
                        labels = batch["labels"]

                        predictions = accelerator.gather(predictions)
                        labels = accelerator.gather(labels)

                        flat_labels, flat_predictions = postprocess(predictions, labels)
                        test_labels.extend(flat_labels)
                        test_predictions.extend(flat_predictions)

                # Calculate test metrics
                precision, recall, f1, _ = precision_recall_fscore_support(
                    test_labels, test_predictions, average="micro"
                )
                print(f"Test Metrics: Precision={precision:.4f}, Recall={recall:.4f}, F1={f1:.4f}")

                # Append results
                new_row = pd.DataFrame(
                    [{"Train Size": train_size, "K-Fold": split + 1, "Test F1": f1, "Model": model_name}]
                )
                results_df = pd.concat([results_df, new_row], ignore_index=True)
                results_df.to_excel(file_name, index=False)

                # Cleanup
                del model, train_dataloader, val_dataloader, test_dataloader, predictions, labels, test_labels
                del test_predictions, logits, loss, outputs, precision, recall, f1, new_row 
                del val_dataset, train_dataset, test_dataset, val_datasets, train_datasets, test_datasets
                del optimizer, tokenized_train, tokenized_test, tokenized_val, tokenizer
                torch.cuda.empty_cache()

    print(f"Results saved to {file_name}")


In [None]:
# Define the models and their corresponding sizes
models = {
    "small": "bert-base-cased",
    "medium": "bert-large-cased",
    "large": "roberta-large"
}

iterate_and_finetune_with_torch_lip(dataset=dataset_lip, file_name='Experiments_lipitor.xlsx', models=models, start_size=180, end_size=500, step_size=5)


Fine-tuning bert-base-cased (small) with Train Size 180, Split 1...


Map: 100%|██████████| 180/180 [00:00<00:00, 6548.48 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 5542.32 examples/s]
Map: 100%|██████████| 783/783 [00:00<00:00, 7828.61 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 23/23 [00:02<00:00, 11.24it/s, loss=0.517]


Epoch 1 Loss: 19.6014
Epoch 2/3


Training Epoch 2: 100%|██████████| 23/23 [00:02<00:00, 10.80it/s, loss=0.302]


Epoch 2 Loss: 8.7552
Epoch 3/3


Training Epoch 3: 100%|██████████| 23/23 [00:02<00:00, 11.24it/s, loss=0.202]


Epoch 3 Loss: 6.9147
Test Metrics: Precision=0.8880, Recall=0.8880, F1=0.8880

Fine-tuning bert-large-cased (medium) with Train Size 180, Split 1...


Map: 100%|██████████| 180/180 [00:00<00:00, 6734.23 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 5889.73 examples/s]
Map: 100%|██████████| 783/783 [00:00<00:00, 8583.88 examples/s]


## State of the art model analysis

In [15]:
models = {
    "state": "dmis-lab/biobert-v1.1"
}

iterate_and_finetune_with_torch(dataset=dataset, file_name='Experiments_full_labeled_biobert.xlsx', models=models, start_size=5, end_size=500, step_size=5)


Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 5, Split 1...


Map: 100%|██████████| 5/5 [00:00<00:00, 2060.07 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 479.79 examples/s]
Map: 100%|██████████| 1242/1242 [00:00<00:00, 8458.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 22.97it/s, loss=2.79]


Epoch 1 Loss: 2.7923
Epoch 2/3


Training Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 24.10it/s, loss=2.19]


Epoch 2 Loss: 2.1950
Epoch 3/3


Training Epoch 3: 100%|██████████| 1/1 [00:00<00:00, 23.97it/s, loss=1.81]

Epoch 3 Loss: 1.8083



  results_df = pd.concat([results_df, new_row], ignore_index=True)


Test Metrics: Precision=0.7172, Recall=0.7172, F1=0.7172

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 5, Split 2...


Map: 100%|██████████| 5/5 [00:00<00:00, 2007.23 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 611.15 examples/s]
Map: 100%|██████████| 1242/1242 [00:00<00:00, 9441.31 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 21.01it/s, loss=2.41]


Epoch 1 Loss: 2.4072
Epoch 2/3


Training Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 23.29it/s, loss=1.79]


Epoch 2 Loss: 1.7889
Epoch 3/3


Training Epoch 3: 100%|██████████| 1/1 [00:00<00:00, 23.17it/s, loss=1.42]

Epoch 3 Loss: 1.4155





Test Metrics: Precision=0.7771, Recall=0.7771, F1=0.7771

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 5, Split 3...


Map: 100%|██████████| 5/5 [00:00<00:00, 1837.99 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 530.39 examples/s]
Map: 100%|██████████| 1242/1242 [00:00<00:00, 9404.27 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 15.23it/s, loss=2.4]


Epoch 1 Loss: 2.4040
Epoch 2/3


Training Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 16.44it/s, loss=1.68]


Epoch 2 Loss: 1.6776
Epoch 3/3


Training Epoch 3: 100%|██████████| 1/1 [00:00<00:00, 16.46it/s, loss=1.28]

Epoch 3 Loss: 1.2808





Test Metrics: Precision=0.7758, Recall=0.7758, F1=0.7758

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 5, Split 4...


Map: 100%|██████████| 5/5 [00:00<00:00, 1582.04 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 787.37 examples/s]
Map: 100%|██████████| 1242/1242 [00:00<00:00, 9279.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 12.11it/s, loss=2.35]


Epoch 1 Loss: 2.3464
Epoch 2/3


Training Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 12.76it/s, loss=1.7]


Epoch 2 Loss: 1.7046
Epoch 3/3


Training Epoch 3: 100%|██████████| 1/1 [00:00<00:00, 12.83it/s, loss=1.37]


Epoch 3 Loss: 1.3707
Test Metrics: Precision=0.7768, Recall=0.7768, F1=0.7768

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 5, Split 5...


Map: 100%|██████████| 5/5 [00:00<00:00, 1925.58 examples/s]
Map: 100%|██████████| 1/1 [00:00<00:00, 573.78 examples/s]
Map: 100%|██████████| 1242/1242 [00:00<00:00, 9271.61 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 1/1 [00:00<00:00, 14.39it/s, loss=2.47]


Epoch 1 Loss: 2.4696
Epoch 2/3


Training Epoch 2: 100%|██████████| 1/1 [00:00<00:00, 15.37it/s, loss=1.88]


Epoch 2 Loss: 1.8779
Epoch 3/3


Training Epoch 3: 100%|██████████| 1/1 [00:00<00:00, 15.49it/s, loss=1.54]


Epoch 3 Loss: 1.5417
Test Metrics: Precision=0.7763, Recall=0.7763, F1=0.7763

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 10, Split 1...


Map: 100%|██████████| 10/10 [00:00<00:00, 3335.70 examples/s]
Map: 100%|██████████| 2/2 [00:00<00:00, 1233.98 examples/s]
Map: 100%|██████████| 1236/1236 [00:00<00:00, 9102.47 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 2/2 [00:00<00:00, 14.42it/s, loss=2.08]


Epoch 1 Loss: 4.5827
Epoch 2/3


Training Epoch 2: 100%|██████████| 2/2 [00:00<00:00, 14.49it/s, loss=1.48]


Epoch 2 Loss: 2.9423
Epoch 3/3


Training Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 15.11it/s, loss=1.12]


Epoch 3 Loss: 2.1320
Test Metrics: Precision=0.7769, Recall=0.7769, F1=0.7769

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 10, Split 2...


Map: 100%|██████████| 10/10 [00:00<00:00, 2037.65 examples/s]
Map: 100%|██████████| 2/2 [00:00<00:00, 1250.72 examples/s]
Map: 100%|██████████| 1236/1236 [00:00<00:00, 8762.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 2/2 [00:00<00:00,  9.22it/s, loss=2.09]


Epoch 1 Loss: 4.6227
Epoch 2/3


Training Epoch 2: 100%|██████████| 2/2 [00:00<00:00, 17.09it/s, loss=1.04]


Epoch 2 Loss: 2.6941
Epoch 3/3


Training Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 10.15it/s, loss=1.15]


Epoch 3 Loss: 2.2238
Test Metrics: Precision=0.7772, Recall=0.7772, F1=0.7772

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 10, Split 3...


Map: 100%|██████████| 10/10 [00:00<00:00, 2853.27 examples/s]
Map: 100%|██████████| 2/2 [00:00<00:00, 1186.34 examples/s]
Map: 100%|██████████| 1236/1236 [00:00<00:00, 9357.89 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 2/2 [00:00<00:00, 14.46it/s, loss=1.74]


Epoch 1 Loss: 4.2645
Epoch 2/3


Training Epoch 2: 100%|██████████| 2/2 [00:00<00:00, 15.93it/s, loss=1.16]


Epoch 2 Loss: 2.4846
Epoch 3/3


Training Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 16.05it/s, loss=1.57]


Epoch 3 Loss: 2.4887
Test Metrics: Precision=0.7772, Recall=0.7772, F1=0.7772

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 10, Split 4...


Map: 100%|██████████| 10/10 [00:00<00:00, 2467.53 examples/s]
Map: 100%|██████████| 2/2 [00:00<00:00, 1144.58 examples/s]
Map: 100%|██████████| 1236/1236 [00:00<00:00, 9175.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 2/2 [00:00<00:00, 14.37it/s, loss=1.88]


Epoch 1 Loss: 4.3329
Epoch 2/3


Training Epoch 2: 100%|██████████| 2/2 [00:00<00:00, 13.76it/s, loss=1.23]


Epoch 2 Loss: 2.6946
Epoch 3/3


Training Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 15.01it/s, loss=0.825]


Epoch 3 Loss: 1.8515
Test Metrics: Precision=0.7770, Recall=0.7770, F1=0.7770

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 10, Split 5...


Map: 100%|██████████| 10/10 [00:00<00:00, 3208.62 examples/s]
Map: 100%|██████████| 2/2 [00:00<00:00, 1028.90 examples/s]
Map: 100%|██████████| 1236/1236 [00:00<00:00, 9260.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 2/2 [00:00<00:00, 15.68it/s, loss=1.95]


Epoch 1 Loss: 4.3262
Epoch 2/3


Training Epoch 2: 100%|██████████| 2/2 [00:00<00:00, 16.45it/s, loss=1.29]


Epoch 2 Loss: 2.8863
Epoch 3/3


Training Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 16.56it/s, loss=1.12]


Epoch 3 Loss: 2.3919
Test Metrics: Precision=0.7778, Recall=0.7778, F1=0.7778

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 15, Split 1...


Map: 100%|██████████| 15/15 [00:00<00:00, 3764.41 examples/s]
Map: 100%|██████████| 3/3 [00:00<00:00, 1744.72 examples/s]
Map: 100%|██████████| 1230/1230 [00:00<00:00, 3570.75 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 2/2 [00:00<00:00, 12.51it/s, loss=1.98]


Epoch 1 Loss: 4.4735
Epoch 2/3


Training Epoch 2: 100%|██████████| 2/2 [00:00<00:00, 11.70it/s, loss=1.34]


Epoch 2 Loss: 2.7362
Epoch 3/3


Training Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 13.21it/s, loss=1.11] 


Epoch 3 Loss: 1.9874
Test Metrics: Precision=0.7769, Recall=0.7769, F1=0.7769

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 15, Split 2...


Map: 100%|██████████| 15/15 [00:00<00:00, 2945.44 examples/s]
Map: 100%|██████████| 3/3 [00:00<00:00, 1225.21 examples/s]
Map: 100%|██████████| 1230/1230 [00:00<00:00, 9274.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 2/2 [00:00<00:00,  8.87it/s, loss=1.64]


Epoch 1 Loss: 3.6781
Epoch 2/3


Training Epoch 2: 100%|██████████| 2/2 [00:00<00:00,  8.50it/s, loss=1.09]


Epoch 2 Loss: 2.2791
Epoch 3/3


Training Epoch 3: 100%|██████████| 2/2 [00:00<00:00,  9.12it/s, loss=1.12] 


Epoch 3 Loss: 1.9764
Test Metrics: Precision=0.7770, Recall=0.7770, F1=0.7770

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 15, Split 3...


Map: 100%|██████████| 15/15 [00:00<00:00, 3523.64 examples/s]
Map: 100%|██████████| 3/3 [00:00<00:00, 1809.71 examples/s]
Map: 100%|██████████| 1230/1230 [00:00<00:00, 9221.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 2/2 [00:00<00:00, 12.96it/s, loss=1.88]


Epoch 1 Loss: 4.3361
Epoch 2/3


Training Epoch 2: 100%|██████████| 2/2 [00:00<00:00, 11.76it/s, loss=1.03]


Epoch 2 Loss: 2.5491
Epoch 3/3


Training Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 14.21it/s, loss=0.806]


Epoch 3 Loss: 2.0008
Test Metrics: Precision=0.7771, Recall=0.7771, F1=0.7771

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 15, Split 4...


Map: 100%|██████████| 15/15 [00:00<00:00, 3732.03 examples/s]
Map: 100%|██████████| 3/3 [00:00<00:00, 1308.27 examples/s]
Map: 100%|██████████| 1230/1230 [00:00<00:00, 9245.43 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 2/2 [00:00<00:00, 12.66it/s, loss=1.92]


Epoch 1 Loss: 4.2325
Epoch 2/3


Training Epoch 2: 100%|██████████| 2/2 [00:00<00:00, 11.26it/s, loss=1.07]


Epoch 2 Loss: 2.5412
Epoch 3/3


Training Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 10.55it/s, loss=0.82]


Epoch 3 Loss: 1.8841
Test Metrics: Precision=0.7769, Recall=0.7769, F1=0.7769

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 15, Split 5...


Map: 100%|██████████| 15/15 [00:00<00:00, 4148.67 examples/s]
Map: 100%|██████████| 3/3 [00:00<00:00, 1808.15 examples/s]
Map: 100%|██████████| 1230/1230 [00:00<00:00, 8943.11 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 2/2 [00:00<00:00, 12.62it/s, loss=2.18]


Epoch 1 Loss: 4.7996
Epoch 2/3


Training Epoch 2: 100%|██████████| 2/2 [00:00<00:00, 13.65it/s, loss=1.62]


Epoch 2 Loss: 3.2521
Epoch 3/3


Training Epoch 3: 100%|██████████| 2/2 [00:00<00:00, 13.76it/s, loss=1.21]


Epoch 3 Loss: 2.4667
Test Metrics: Precision=0.7779, Recall=0.7779, F1=0.7779

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 20, Split 1...


Map: 100%|██████████| 20/20 [00:00<00:00, 4687.42 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 1956.30 examples/s]
Map: 100%|██████████| 1224/1224 [00:00<00:00, 8960.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 3/3 [00:00<00:00, 12.88it/s, loss=1.48]


Epoch 1 Loss: 5.8401
Epoch 2/3


Training Epoch 2: 100%|██████████| 3/3 [00:00<00:00, 14.66it/s, loss=1.06]


Epoch 2 Loss: 3.0616
Epoch 3/3


Training Epoch 3: 100%|██████████| 3/3 [00:00<00:00, 14.99it/s, loss=0.456]


Epoch 3 Loss: 2.3703
Test Metrics: Precision=0.7771, Recall=0.7771, F1=0.7771

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 20, Split 2...


Map: 100%|██████████| 20/20 [00:00<00:00, 3173.54 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 1931.75 examples/s]
Map: 100%|██████████| 1224/1224 [00:00<00:00, 8731.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 3/3 [00:00<00:00, 10.51it/s, loss=1.68]


Epoch 1 Loss: 6.3689
Epoch 2/3


Training Epoch 2: 100%|██████████| 3/3 [00:00<00:00, 11.37it/s, loss=0.71] 


Epoch 2 Loss: 2.8648
Epoch 3/3


Training Epoch 3: 100%|██████████| 3/3 [00:00<00:00, 10.07it/s, loss=1.05] 


Epoch 3 Loss: 2.6823
Test Metrics: Precision=0.7770, Recall=0.7770, F1=0.7770

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 20, Split 3...


Map: 100%|██████████| 20/20 [00:00<00:00, 4608.37 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 1739.47 examples/s]
Map: 100%|██████████| 1224/1224 [00:00<00:00, 9041.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 3/3 [00:00<00:00, 13.55it/s, loss=1.18]


Epoch 1 Loss: 5.0654
Epoch 2/3


Training Epoch 2: 100%|██████████| 3/3 [00:00<00:00, 13.73it/s, loss=0.64]


Epoch 2 Loss: 2.7370
Epoch 3/3


Training Epoch 3: 100%|██████████| 3/3 [00:00<00:00, 14.75it/s, loss=1.19] 


Epoch 3 Loss: 2.9568
Test Metrics: Precision=0.7768, Recall=0.7768, F1=0.7768

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 20, Split 4...


Map: 100%|██████████| 20/20 [00:00<00:00, 4572.94 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 1614.75 examples/s]
Map: 100%|██████████| 1224/1224 [00:00<00:00, 9032.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 3/3 [00:00<00:00, 12.22it/s, loss=1.36]


Epoch 1 Loss: 5.4946
Epoch 2/3


Training Epoch 2: 100%|██████████| 3/3 [00:00<00:00, 12.97it/s, loss=0.725]


Epoch 2 Loss: 2.6343
Epoch 3/3


Training Epoch 3: 100%|██████████| 3/3 [00:00<00:00, 11.93it/s, loss=0.737]


Epoch 3 Loss: 2.4273
Test Metrics: Precision=0.7770, Recall=0.7770, F1=0.7770

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 20, Split 5...


Map: 100%|██████████| 20/20 [00:00<00:00, 4705.56 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 2245.04 examples/s]
Map: 100%|██████████| 1224/1224 [00:00<00:00, 9015.43 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 3/3 [00:00<00:00, 14.40it/s, loss=1.61]


Epoch 1 Loss: 5.9879
Epoch 2/3


Training Epoch 2: 100%|██████████| 3/3 [00:00<00:00, 13.36it/s, loss=0.808]


Epoch 2 Loss: 3.3313
Epoch 3/3


Training Epoch 3: 100%|██████████| 3/3 [00:00<00:00, 13.22it/s, loss=0.72]


Epoch 3 Loss: 3.1198
Test Metrics: Precision=0.7784, Recall=0.7784, F1=0.7784

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 25, Split 1...


Map: 100%|██████████| 25/25 [00:00<00:00, 5403.92 examples/s]
Map: 100%|██████████| 5/5 [00:00<00:00, 2446.80 examples/s]
Map: 100%|██████████| 1218/1218 [00:00<00:00, 8944.19 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 4/4 [00:00<00:00, 14.48it/s, loss=1.49]


Epoch 1 Loss: 6.9483
Epoch 2/3


Training Epoch 2: 100%|██████████| 4/4 [00:00<00:00, 14.02it/s, loss=0.603]


Epoch 2 Loss: 3.2993
Epoch 3/3


Training Epoch 3: 100%|██████████| 4/4 [00:00<00:00, 15.30it/s, loss=0.859]


Epoch 3 Loss: 3.3499
Test Metrics: Precision=0.7771, Recall=0.7771, F1=0.7771

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 25, Split 2...


Map: 100%|██████████| 25/25 [00:00<00:00, 3725.49 examples/s]
Map: 100%|██████████| 5/5 [00:00<00:00, 2346.33 examples/s]
Map: 100%|██████████| 1218/1218 [00:00<00:00, 8973.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 4/4 [00:00<00:00, 11.07it/s, loss=0.921]


Epoch 1 Loss: 7.1160
Epoch 2/3


Training Epoch 2: 100%|██████████| 4/4 [00:00<00:00, 11.18it/s, loss=0.79] 


Epoch 2 Loss: 3.7012
Epoch 3/3


Training Epoch 3: 100%|██████████| 4/4 [00:00<00:00,  8.82it/s, loss=0.779]


Epoch 3 Loss: 3.4935
Test Metrics: Precision=0.7774, Recall=0.7774, F1=0.7774

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 25, Split 3...


Map: 100%|██████████| 25/25 [00:00<00:00, 5572.49 examples/s]
Map: 100%|██████████| 5/5 [00:00<00:00, 2155.35 examples/s]
Map: 100%|██████████| 1218/1218 [00:00<00:00, 9363.21 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 4/4 [00:00<00:00, 17.18it/s, loss=1.83]


Epoch 1 Loss: 8.0405
Epoch 2/3


Training Epoch 2: 100%|██████████| 4/4 [00:00<00:00, 14.42it/s, loss=1.17] 


Epoch 2 Loss: 4.0718
Epoch 3/3


Training Epoch 3: 100%|██████████| 4/4 [00:00<00:00, 14.37it/s, loss=0.525]


Epoch 3 Loss: 3.1931
Test Metrics: Precision=0.7773, Recall=0.7773, F1=0.7773

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 25, Split 4...


Map: 100%|██████████| 25/25 [00:00<00:00, 4153.27 examples/s]
Map: 100%|██████████| 5/5 [00:00<00:00, 2243.66 examples/s]
Map: 100%|██████████| 1218/1218 [00:00<00:00, 9479.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 4/4 [00:00<00:00, 12.29it/s, loss=1.61]


Epoch 1 Loss: 6.8514
Epoch 2/3


Training Epoch 2: 100%|██████████| 4/4 [00:00<00:00, 13.29it/s, loss=0.854]


Epoch 2 Loss: 3.2686
Epoch 3/3


Training Epoch 3: 100%|██████████| 4/4 [00:00<00:00, 12.67it/s, loss=0.315]


Epoch 3 Loss: 2.5522
Test Metrics: Precision=0.7765, Recall=0.7765, F1=0.7765

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 25, Split 5...


Map: 100%|██████████| 25/25 [00:00<00:00, 5317.59 examples/s]
Map: 100%|██████████| 5/5 [00:00<00:00, 1697.00 examples/s]
Map: 100%|██████████| 1218/1218 [00:00<00:00, 9311.21 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 4/4 [00:00<00:00, 15.90it/s, loss=1.58]


Epoch 1 Loss: 7.1875
Epoch 2/3


Training Epoch 2: 100%|██████████| 4/4 [00:00<00:00, 15.01it/s, loss=2.26] 


Epoch 2 Loss: 5.2873
Epoch 3/3


Training Epoch 3: 100%|██████████| 4/4 [00:00<00:00, 13.86it/s, loss=0.711]


Epoch 3 Loss: 3.5063
Test Metrics: Precision=0.7776, Recall=0.7776, F1=0.7776

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 30, Split 1...


Map: 100%|██████████| 30/30 [00:00<00:00, 6005.88 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 2111.05 examples/s]
Map: 100%|██████████| 1212/1212 [00:00<00:00, 9392.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 4/4 [00:00<00:00, 13.84it/s, loss=1.35]


Epoch 1 Loss: 7.6119
Epoch 2/3


Training Epoch 2: 100%|██████████| 4/4 [00:00<00:00, 13.55it/s, loss=0.989]


Epoch 2 Loss: 3.7754
Epoch 3/3


Training Epoch 3: 100%|██████████| 4/4 [00:00<00:00, 16.11it/s, loss=0.649]


Epoch 3 Loss: 3.4143
Test Metrics: Precision=0.7769, Recall=0.7769, F1=0.7769

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 30, Split 2...


Map: 100%|██████████| 30/30 [00:00<00:00, 4269.88 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 2429.60 examples/s]
Map: 100%|██████████| 1212/1212 [00:00<00:00, 9319.74 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 4/4 [00:00<00:00,  9.49it/s, loss=1.04]


Epoch 1 Loss: 6.8139
Epoch 2/3


Training Epoch 2: 100%|██████████| 4/4 [00:00<00:00, 11.17it/s, loss=0.954]


Epoch 2 Loss: 4.1118
Epoch 3/3


Training Epoch 3: 100%|██████████| 4/4 [00:00<00:00,  9.12it/s, loss=0.946]


Epoch 3 Loss: 3.7781
Test Metrics: Precision=0.7775, Recall=0.7775, F1=0.7775

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 30, Split 3...


Map: 100%|██████████| 30/30 [00:00<00:00, 5724.45 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 2788.15 examples/s]
Map: 100%|██████████| 1212/1212 [00:00<00:00, 9351.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 4/4 [00:00<00:00, 11.95it/s, loss=1.29]


Epoch 1 Loss: 7.1909
Epoch 2/3


Training Epoch 2: 100%|██████████| 4/4 [00:00<00:00, 13.53it/s, loss=0.92] 


Epoch 2 Loss: 3.6261
Epoch 3/3


Training Epoch 3: 100%|██████████| 4/4 [00:00<00:00, 16.25it/s, loss=0.761]


Epoch 3 Loss: 3.2558
Test Metrics: Precision=0.7775, Recall=0.7775, F1=0.7775

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 30, Split 4...


Map: 100%|██████████| 30/30 [00:00<00:00, 5627.42 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 2146.52 examples/s]
Map: 100%|██████████| 1212/1212 [00:00<00:00, 9456.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 4/4 [00:00<00:00, 11.77it/s, loss=1.41]


Epoch 1 Loss: 7.5478
Epoch 2/3


Training Epoch 2: 100%|██████████| 4/4 [00:00<00:00, 13.53it/s, loss=0.885]


Epoch 2 Loss: 3.5834
Epoch 3/3


Training Epoch 3: 100%|██████████| 4/4 [00:00<00:00, 11.25it/s, loss=0.514]


Epoch 3 Loss: 3.1504
Test Metrics: Precision=0.7766, Recall=0.7766, F1=0.7766

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 30, Split 5...


Map: 100%|██████████| 30/30 [00:00<00:00, 5893.91 examples/s]
Map: 100%|██████████| 6/6 [00:00<00:00, 2201.16 examples/s]
Map: 100%|██████████| 1212/1212 [00:00<00:00, 9101.58 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 4/4 [00:00<00:00, 13.44it/s, loss=1.39]


Epoch 1 Loss: 7.4638
Epoch 2/3


Training Epoch 2: 100%|██████████| 4/4 [00:00<00:00, 12.41it/s, loss=1.43]


Epoch 2 Loss: 4.3078
Epoch 3/3


Training Epoch 3: 100%|██████████| 4/4 [00:00<00:00, 11.84it/s, loss=1]    


Epoch 3 Loss: 3.7360
Test Metrics: Precision=0.7776, Recall=0.7776, F1=0.7776

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 35, Split 1...


Map: 100%|██████████| 35/35 [00:00<00:00, 6444.28 examples/s]
Map: 100%|██████████| 7/7 [00:00<00:00, 2768.52 examples/s]
Map: 100%|██████████| 1206/1206 [00:00<00:00, 9172.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 5/5 [00:00<00:00, 15.60it/s, loss=0.806]


Epoch 1 Loss: 7.2762
Epoch 2/3


Training Epoch 2: 100%|██████████| 5/5 [00:00<00:00, 14.85it/s, loss=0.801]


Epoch 2 Loss: 4.5039
Epoch 3/3


Training Epoch 3: 100%|██████████| 5/5 [00:00<00:00, 15.48it/s, loss=1.02] 


Epoch 3 Loss: 4.0503
Test Metrics: Precision=0.7771, Recall=0.7771, F1=0.7771

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 35, Split 2...


Map: 100%|██████████| 35/35 [00:00<00:00, 4731.54 examples/s]
Map: 100%|██████████| 7/7 [00:00<00:00, 2919.08 examples/s]
Map: 100%|██████████| 1206/1206 [00:00<00:00, 9234.69 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 5/5 [00:00<00:00,  9.86it/s, loss=0.944]


Epoch 1 Loss: 7.9971
Epoch 2/3


Training Epoch 2: 100%|██████████| 5/5 [00:00<00:00,  9.73it/s, loss=0.943]


Epoch 2 Loss: 5.2030
Epoch 3/3


Training Epoch 3: 100%|██████████| 5/5 [00:00<00:00,  9.82it/s, loss=0.59] 


Epoch 3 Loss: 4.2626
Test Metrics: Precision=0.7775, Recall=0.7775, F1=0.7775

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 35, Split 3...


Map: 100%|██████████| 35/35 [00:00<00:00, 5715.87 examples/s]
Map: 100%|██████████| 7/7 [00:00<00:00, 2551.06 examples/s]
Map: 100%|██████████| 1206/1206 [00:00<00:00, 8926.24 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 5/5 [00:00<00:00, 13.13it/s, loss=1.53]


Epoch 1 Loss: 8.2924
Epoch 2/3


Training Epoch 2: 100%|██████████| 5/5 [00:00<00:00, 14.60it/s, loss=0.634]


Epoch 2 Loss: 4.4932
Epoch 3/3


Training Epoch 3: 100%|██████████| 5/5 [00:00<00:00, 13.99it/s, loss=0.564]


Epoch 3 Loss: 3.7849
Test Metrics: Precision=0.7772, Recall=0.7772, F1=0.7772

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 35, Split 4...


Map: 100%|██████████| 35/35 [00:00<00:00, 5674.77 examples/s]
Map: 100%|██████████| 7/7 [00:00<00:00, 2889.78 examples/s]
Map: 100%|██████████| 1206/1206 [00:00<00:00, 8959.58 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 5/5 [00:00<00:00, 12.56it/s, loss=0.877]


Epoch 1 Loss: 7.8567
Epoch 2/3


Training Epoch 2: 100%|██████████| 5/5 [00:00<00:00, 13.26it/s, loss=0.445]


Epoch 2 Loss: 4.1625
Epoch 3/3


Training Epoch 3: 100%|██████████| 5/5 [00:00<00:00, 12.58it/s, loss=0.52] 


Epoch 3 Loss: 3.7200
Test Metrics: Precision=0.7758, Recall=0.7758, F1=0.7758

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 35, Split 5...


Map: 100%|██████████| 35/35 [00:00<00:00, 6287.23 examples/s]
Map: 100%|██████████| 7/7 [00:00<00:00, 2974.08 examples/s]
Map: 100%|██████████| 1206/1206 [00:00<00:00, 8971.79 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 5/5 [00:00<00:00, 14.52it/s, loss=0.63]


Epoch 1 Loss: 7.6899
Epoch 2/3


Training Epoch 2: 100%|██████████| 5/5 [00:00<00:00, 14.35it/s, loss=0.587]


Epoch 2 Loss: 4.8789
Epoch 3/3


Training Epoch 3: 100%|██████████| 5/5 [00:00<00:00, 13.02it/s, loss=1.23] 


Epoch 3 Loss: 4.5123
Test Metrics: Precision=0.7778, Recall=0.7778, F1=0.7778

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 40, Split 1...


Map: 100%|██████████| 40/40 [00:00<00:00, 6527.34 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 2288.69 examples/s]
Map: 100%|██████████| 1200/1200 [00:00<00:00, 9103.84 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 5/5 [00:00<00:00, 14.21it/s, loss=0.854]


Epoch 1 Loss: 8.4840
Epoch 2/3


Training Epoch 2: 100%|██████████| 5/5 [00:00<00:00, 13.29it/s, loss=0.831]


Epoch 2 Loss: 4.8945
Epoch 3/3


Training Epoch 3: 100%|██████████| 5/5 [00:00<00:00, 13.76it/s, loss=0.898]


Epoch 3 Loss: 4.2736
Test Metrics: Precision=0.7765, Recall=0.7765, F1=0.7765

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 40, Split 2...


Map: 100%|██████████| 40/40 [00:00<00:00, 5243.86 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 2941.31 examples/s]
Map: 100%|██████████| 1200/1200 [00:00<00:00, 9121.24 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 5/5 [00:00<00:00,  9.44it/s, loss=1.05]


Epoch 1 Loss: 8.2433
Epoch 2/3


Training Epoch 2: 100%|██████████| 5/5 [00:00<00:00,  9.30it/s, loss=1.1]  


Epoch 2 Loss: 5.0083
Epoch 3/3


Training Epoch 3: 100%|██████████| 5/5 [00:00<00:00,  9.73it/s, loss=0.828]


Epoch 3 Loss: 4.4654
Test Metrics: Precision=0.7781, Recall=0.7781, F1=0.7781

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 40, Split 3...


Map: 100%|██████████| 40/40 [00:00<00:00, 6690.81 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 2104.12 examples/s]
Map: 100%|██████████| 1200/1200 [00:00<00:00, 9056.85 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 5/5 [00:00<00:00, 12.94it/s, loss=1.3] 


Epoch 1 Loss: 8.2562
Epoch 2/3


Training Epoch 2: 100%|██████████| 5/5 [00:00<00:00, 13.68it/s, loss=0.69] 


Epoch 2 Loss: 4.5969
Epoch 3/3


Training Epoch 3: 100%|██████████| 5/5 [00:00<00:00, 15.07it/s, loss=0.694]


Epoch 3 Loss: 4.1026
Test Metrics: Precision=0.7767, Recall=0.7767, F1=0.7767

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 40, Split 4...


Map: 100%|██████████| 40/40 [00:00<00:00, 5483.83 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 3806.95 examples/s]
Map: 100%|██████████| 1200/1200 [00:00<00:00, 9002.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 5/5 [00:00<00:00,  9.30it/s, loss=0.936]


Epoch 1 Loss: 8.0084
Epoch 2/3


Training Epoch 2: 100%|██████████| 5/5 [00:00<00:00,  9.48it/s, loss=0.846]


Epoch 2 Loss: 4.3422
Epoch 3/3


Training Epoch 3: 100%|██████████| 5/5 [00:00<00:00,  9.59it/s, loss=0.672]


Epoch 3 Loss: 3.7020
Test Metrics: Precision=0.7765, Recall=0.7765, F1=0.7765

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 40, Split 5...


Map: 100%|██████████| 40/40 [00:00<00:00, 6109.03 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 2013.35 examples/s]
Map: 100%|██████████| 1200/1200 [00:00<00:00, 9171.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 5/5 [00:00<00:00, 11.60it/s, loss=0.954]


Epoch 1 Loss: 7.7181
Epoch 2/3


Training Epoch 2: 100%|██████████| 5/5 [00:00<00:00, 11.31it/s, loss=0.969]


Epoch 2 Loss: 4.9181
Epoch 3/3


Training Epoch 3: 100%|██████████| 5/5 [00:00<00:00, 11.01it/s, loss=0.89] 


Epoch 3 Loss: 4.1886
Test Metrics: Precision=0.7777, Recall=0.7777, F1=0.7777

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 45, Split 1...


Map: 100%|██████████| 45/45 [00:00<00:00, 7356.42 examples/s]
Map: 100%|██████████| 9/9 [00:00<00:00, 3701.22 examples/s]
Map: 100%|██████████| 1194/1194 [00:00<00:00, 3689.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 6/6 [00:00<00:00, 13.85it/s, loss=1.02] 


Epoch 1 Loss: 7.4641
Epoch 2/3


Training Epoch 2: 100%|██████████| 6/6 [00:00<00:00, 14.14it/s, loss=0.423]


Epoch 2 Loss: 4.5698
Epoch 3/3


Training Epoch 3: 100%|██████████| 6/6 [00:00<00:00, 13.89it/s, loss=0.662]


Epoch 3 Loss: 3.9221
Test Metrics: Precision=0.7812, Recall=0.7812, F1=0.7812

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 45, Split 2...


Map: 100%|██████████| 45/45 [00:00<00:00, 6016.50 examples/s]
Map: 100%|██████████| 9/9 [00:00<00:00, 2361.95 examples/s]
Map: 100%|██████████| 1194/1194 [00:00<00:00, 9070.16 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 6/6 [00:00<00:00, 11.69it/s, loss=1.8]  


Epoch 1 Loss: 8.9401
Epoch 2/3


Training Epoch 2: 100%|██████████| 6/6 [00:00<00:00, 10.80it/s, loss=1.2]  


Epoch 2 Loss: 5.4913
Epoch 3/3


Training Epoch 3: 100%|██████████| 6/6 [00:00<00:00, 10.62it/s, loss=0.908]


Epoch 3 Loss: 4.7346
Test Metrics: Precision=0.7805, Recall=0.7805, F1=0.7805

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 45, Split 3...


Map: 100%|██████████| 45/45 [00:00<00:00, 6811.39 examples/s]
Map: 100%|██████████| 9/9 [00:00<00:00, 3530.56 examples/s]
Map: 100%|██████████| 1194/1194 [00:00<00:00, 8661.77 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 6/6 [00:00<00:00, 13.44it/s, loss=1.05]


Epoch 1 Loss: 9.9669
Epoch 2/3


Training Epoch 2: 100%|██████████| 6/6 [00:00<00:00, 13.65it/s, loss=0.551]


Epoch 2 Loss: 5.2525
Epoch 3/3


Training Epoch 3: 100%|██████████| 6/6 [00:00<00:00, 14.23it/s, loss=0.586]


Epoch 3 Loss: 4.5987
Test Metrics: Precision=0.7775, Recall=0.7775, F1=0.7775

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 45, Split 4...


Map: 100%|██████████| 45/45 [00:00<00:00, 5820.75 examples/s]
Map: 100%|██████████| 9/9 [00:00<00:00, 3302.31 examples/s]
Map: 100%|██████████| 1194/1194 [00:00<00:00, 8849.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 6/6 [00:00<00:00, 10.11it/s, loss=0.878]


Epoch 1 Loss: 8.1329
Epoch 2/3


Training Epoch 2: 100%|██████████| 6/6 [00:00<00:00, 10.39it/s, loss=0.987]


Epoch 2 Loss: 5.1710
Epoch 3/3


Training Epoch 3: 100%|██████████| 6/6 [00:00<00:00, 10.75it/s, loss=0.975]


Epoch 3 Loss: 4.5428
Test Metrics: Precision=0.7764, Recall=0.7764, F1=0.7764

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 45, Split 5...


Map: 100%|██████████| 45/45 [00:00<00:00, 6286.43 examples/s]
Map: 100%|██████████| 9/9 [00:00<00:00, 3317.40 examples/s]
Map: 100%|██████████| 1194/1194 [00:00<00:00, 8695.92 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 6/6 [00:00<00:00, 11.88it/s, loss=0.955]


Epoch 1 Loss: 9.1026
Epoch 2/3


Training Epoch 2: 100%|██████████| 6/6 [00:00<00:00, 11.58it/s, loss=0.947]


Epoch 2 Loss: 5.9587
Epoch 3/3


Training Epoch 3: 100%|██████████| 6/6 [00:00<00:00, 11.09it/s, loss=0.859]


Epoch 3 Loss: 5.1299
Test Metrics: Precision=0.7775, Recall=0.7775, F1=0.7775

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 50, Split 1...


Map: 100%|██████████| 50/50 [00:00<00:00, 6848.29 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 2984.00 examples/s]
Map: 100%|██████████| 1188/1188 [00:00<00:00, 8637.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 7/7 [00:00<00:00, 14.42it/s, loss=0.763]


Epoch 1 Loss: 9.6974
Epoch 2/3


Training Epoch 2: 100%|██████████| 7/7 [00:00<00:00, 14.36it/s, loss=0.849]


Epoch 2 Loss: 6.3233
Epoch 3/3


Training Epoch 3: 100%|██████████| 7/7 [00:00<00:00, 14.18it/s, loss=0.865]


Epoch 3 Loss: 5.5099
Test Metrics: Precision=0.7773, Recall=0.7773, F1=0.7773

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 50, Split 2...


Map: 100%|██████████| 50/50 [00:00<00:00, 6055.18 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 3933.88 examples/s]
Map: 100%|██████████| 1188/1188 [00:00<00:00, 9034.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 7/7 [00:00<00:00, 12.70it/s, loss=0.631]


Epoch 1 Loss: 9.1872
Epoch 2/3


Training Epoch 2: 100%|██████████| 7/7 [00:00<00:00, 11.24it/s, loss=1.24] 


Epoch 2 Loss: 6.4141
Epoch 3/3


Training Epoch 3: 100%|██████████| 7/7 [00:00<00:00, 10.86it/s, loss=0.729]


Epoch 3 Loss: 5.2614
Test Metrics: Precision=0.7795, Recall=0.7795, F1=0.7795

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 50, Split 3...


Map: 100%|██████████| 50/50 [00:00<00:00, 7105.14 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 3410.28 examples/s]
Map: 100%|██████████| 1188/1188 [00:00<00:00, 9115.50 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 7/7 [00:00<00:00, 14.75it/s, loss=0.866]


Epoch 1 Loss: 9.8166
Epoch 2/3


Training Epoch 2: 100%|██████████| 7/7 [00:00<00:00, 14.81it/s, loss=0.877]


Epoch 2 Loss: 5.6707
Epoch 3/3


Training Epoch 3: 100%|██████████| 7/7 [00:00<00:00, 15.20it/s, loss=1.13] 


Epoch 3 Loss: 5.0442
Test Metrics: Precision=0.7987, Recall=0.7987, F1=0.7987

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 50, Split 4...


Map: 100%|██████████| 50/50 [00:00<00:00, 6135.07 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 2421.65 examples/s]
Map: 100%|██████████| 1188/1188 [00:00<00:00, 9131.67 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 7/7 [00:00<00:00, 10.65it/s, loss=1.02] 


Epoch 1 Loss: 9.9967
Epoch 2/3


Training Epoch 2: 100%|██████████| 7/7 [00:00<00:00, 10.35it/s, loss=0.673]


Epoch 2 Loss: 6.0139
Epoch 3/3


Training Epoch 3: 100%|██████████| 7/7 [00:00<00:00, 10.26it/s, loss=0.699]


Epoch 3 Loss: 5.2274
Test Metrics: Precision=0.7764, Recall=0.7764, F1=0.7764

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 50, Split 5...


Map: 100%|██████████| 50/50 [00:00<00:00, 6795.48 examples/s]
Map: 100%|██████████| 10/10 [00:00<00:00, 3495.84 examples/s]
Map: 100%|██████████| 1188/1188 [00:00<00:00, 9108.63 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 7/7 [00:00<00:00, 13.56it/s, loss=0.758]


Epoch 1 Loss: 10.5338
Epoch 2/3


Training Epoch 2: 100%|██████████| 7/7 [00:00<00:00, 12.02it/s, loss=0.739]


Epoch 2 Loss: 6.0666
Epoch 3/3


Training Epoch 3: 100%|██████████| 7/7 [00:00<00:00, 13.38it/s, loss=0.564]


Epoch 3 Loss: 5.1105
Test Metrics: Precision=0.7776, Recall=0.7776, F1=0.7776

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 55, Split 1...


Map: 100%|██████████| 55/55 [00:00<00:00, 6640.38 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 3567.96 examples/s]
Map: 100%|██████████| 1182/1182 [00:00<00:00, 9234.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 7/7 [00:00<00:00, 11.36it/s, loss=1.08] 


Epoch 1 Loss: 9.3901
Epoch 2/3


Training Epoch 2: 100%|██████████| 7/7 [00:00<00:00, 11.55it/s, loss=0.825]


Epoch 2 Loss: 6.2628
Epoch 3/3


Training Epoch 3: 100%|██████████| 7/7 [00:00<00:00, 11.52it/s, loss=0.898]


Epoch 3 Loss: 5.8277
Test Metrics: Precision=0.7773, Recall=0.7773, F1=0.7773

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 55, Split 2...


Map: 100%|██████████| 55/55 [00:00<00:00, 6444.66 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 2914.92 examples/s]
Map: 100%|██████████| 1182/1182 [00:00<00:00, 8959.48 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 7/7 [00:00<00:00, 10.87it/s, loss=1.14] 


Epoch 1 Loss: 10.4877
Epoch 2/3


Training Epoch 2: 100%|██████████| 7/7 [00:00<00:00, 10.68it/s, loss=0.715]


Epoch 2 Loss: 6.4285
Epoch 3/3


Training Epoch 3: 100%|██████████| 7/7 [00:00<00:00, 11.05it/s, loss=0.75] 


Epoch 3 Loss: 5.6298
Test Metrics: Precision=0.7787, Recall=0.7787, F1=0.7787

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 55, Split 3...


Map: 100%|██████████| 55/55 [00:00<00:00, 7848.36 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 3147.16 examples/s]
Map: 100%|██████████| 1182/1182 [00:00<00:00, 9052.69 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 7/7 [00:00<00:00, 13.60it/s, loss=1.23] 


Epoch 1 Loss: 9.6791
Epoch 2/3


Training Epoch 2: 100%|██████████| 7/7 [00:00<00:00, 13.79it/s, loss=0.587]


Epoch 2 Loss: 5.8735
Epoch 3/3


Training Epoch 3: 100%|██████████| 7/7 [00:00<00:00, 14.58it/s, loss=0.713]


Epoch 3 Loss: 4.8556
Test Metrics: Precision=0.7772, Recall=0.7772, F1=0.7772

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 55, Split 4...


Map: 100%|██████████| 55/55 [00:00<00:00, 6728.31 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 3296.00 examples/s]
Map: 100%|██████████| 1182/1182 [00:00<00:00, 9083.69 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 7/7 [00:00<00:00,  9.86it/s, loss=0.837]


Epoch 1 Loss: 9.2885
Epoch 2/3


Training Epoch 2: 100%|██████████| 7/7 [00:00<00:00,  9.79it/s, loss=0.688]


Epoch 2 Loss: 5.5507
Epoch 3/3


Training Epoch 3: 100%|██████████| 7/7 [00:00<00:00, 10.47it/s, loss=0.602]


Epoch 3 Loss: 4.7398
Test Metrics: Precision=0.7808, Recall=0.7808, F1=0.7808

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 55, Split 5...


Map: 100%|██████████| 55/55 [00:00<00:00, 6746.61 examples/s]
Map: 100%|██████████| 11/11 [00:00<00:00, 2768.35 examples/s]
Map: 100%|██████████| 1182/1182 [00:00<00:00, 9073.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 7/7 [00:00<00:00, 11.24it/s, loss=0.725]


Epoch 1 Loss: 9.5397
Epoch 2/3


Training Epoch 2: 100%|██████████| 7/7 [00:00<00:00, 10.95it/s, loss=0.836]


Epoch 2 Loss: 5.9254
Epoch 3/3


Training Epoch 3: 100%|██████████| 7/7 [00:00<00:00, 12.10it/s, loss=0.549]


Epoch 3 Loss: 5.0218
Test Metrics: Precision=0.7796, Recall=0.7796, F1=0.7796

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 60, Split 1...


Map: 100%|██████████| 60/60 [00:00<00:00, 6753.21 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 3608.78 examples/s]
Map: 100%|██████████| 1176/1176 [00:00<00:00, 3738.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 8/8 [00:00<00:00, 11.18it/s, loss=0.992]


Epoch 1 Loss: 10.7729
Epoch 2/3


Training Epoch 2: 100%|██████████| 8/8 [00:00<00:00, 11.36it/s, loss=0.599]


Epoch 2 Loss: 6.2038
Epoch 3/3


Training Epoch 3: 100%|██████████| 8/8 [00:00<00:00, 11.16it/s, loss=0.414]


Epoch 3 Loss: 5.2649
Test Metrics: Precision=0.7821, Recall=0.7821, F1=0.7821

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 60, Split 2...


Map: 100%|██████████| 60/60 [00:00<00:00, 7035.26 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 3349.41 examples/s]
Map: 100%|██████████| 1176/1176 [00:00<00:00, 8895.42 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 8/8 [00:00<00:00, 11.42it/s, loss=1.14]


Epoch 1 Loss: 11.4540
Epoch 2/3


Training Epoch 2: 100%|██████████| 8/8 [00:00<00:00, 11.00it/s, loss=1.03] 


Epoch 2 Loss: 6.8882
Epoch 3/3


Training Epoch 3: 100%|██████████| 8/8 [00:00<00:00, 11.56it/s, loss=0.577]


Epoch 3 Loss: 5.6543
Test Metrics: Precision=0.7815, Recall=0.7815, F1=0.7815

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 60, Split 3...


Map: 100%|██████████| 60/60 [00:00<00:00, 7479.14 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 3596.92 examples/s]
Map: 100%|██████████| 1176/1176 [00:00<00:00, 8921.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 8/8 [00:00<00:00, 13.94it/s, loss=0.992]


Epoch 1 Loss: 10.0632
Epoch 2/3


Training Epoch 2: 100%|██████████| 8/8 [00:00<00:00, 13.59it/s, loss=0.702]


Epoch 2 Loss: 5.9426
Epoch 3/3


Training Epoch 3: 100%|██████████| 8/8 [00:00<00:00, 14.26it/s, loss=0.49] 


Epoch 3 Loss: 4.9762
Test Metrics: Precision=0.7965, Recall=0.7965, F1=0.7965

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 60, Split 4...


Map: 100%|██████████| 60/60 [00:00<00:00, 6992.84 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 3276.16 examples/s]
Map: 100%|██████████| 1176/1176 [00:00<00:00, 9324.27 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 8/8 [00:00<00:00,  9.84it/s, loss=1.17] 


Epoch 1 Loss: 11.0934
Epoch 2/3


Training Epoch 2: 100%|██████████| 8/8 [00:00<00:00, 10.32it/s, loss=0.807]


Epoch 2 Loss: 6.2852
Epoch 3/3


Training Epoch 3: 100%|██████████| 8/8 [00:00<00:00, 10.64it/s, loss=0.592]


Epoch 3 Loss: 5.3496
Test Metrics: Precision=0.7778, Recall=0.7778, F1=0.7778

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 60, Split 5...


Map: 100%|██████████| 60/60 [00:00<00:00, 7015.84 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 3086.32 examples/s]
Map: 100%|██████████| 1176/1176 [00:00<00:00, 9119.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 8/8 [00:00<00:00, 12.10it/s, loss=1.38] 


Epoch 1 Loss: 10.7289
Epoch 2/3


Training Epoch 2: 100%|██████████| 8/8 [00:00<00:00, 12.27it/s, loss=0.881]


Epoch 2 Loss: 6.7701
Epoch 3/3


Training Epoch 3: 100%|██████████| 8/8 [00:00<00:00, 12.33it/s, loss=0.518]


Epoch 3 Loss: 5.6471
Test Metrics: Precision=0.7827, Recall=0.7827, F1=0.7827

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 65, Split 1...


Map: 100%|██████████| 65/65 [00:00<00:00, 6607.92 examples/s]
Map: 100%|██████████| 13/13 [00:00<00:00, 3503.56 examples/s]
Map: 100%|██████████| 1170/1170 [00:00<00:00, 8754.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 9/9 [00:00<00:00, 11.60it/s, loss=0.634]


Epoch 1 Loss: 12.1254
Epoch 2/3


Training Epoch 2: 100%|██████████| 9/9 [00:00<00:00, 11.95it/s, loss=0.809]


Epoch 2 Loss: 6.5281
Epoch 3/3


Training Epoch 3: 100%|██████████| 9/9 [00:00<00:00, 12.06it/s, loss=0.358]


Epoch 3 Loss: 5.2458
Test Metrics: Precision=0.7838, Recall=0.7838, F1=0.7838

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 65, Split 2...


Map: 100%|██████████| 65/65 [00:00<00:00, 6957.15 examples/s]
Map: 100%|██████████| 13/13 [00:00<00:00, 4274.53 examples/s]
Map: 100%|██████████| 1170/1170 [00:00<00:00, 8842.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 9/9 [00:00<00:00, 11.61it/s, loss=1.1]  


Epoch 1 Loss: 11.7955
Epoch 2/3


Training Epoch 2: 100%|██████████| 9/9 [00:00<00:00, 11.67it/s, loss=0.729]


Epoch 2 Loss: 6.9014
Epoch 3/3


Training Epoch 3: 100%|██████████| 9/9 [00:00<00:00, 11.78it/s, loss=0.476]


Epoch 3 Loss: 6.1626
Test Metrics: Precision=0.7953, Recall=0.7953, F1=0.7953

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 65, Split 3...


Map: 100%|██████████| 65/65 [00:00<00:00, 6995.89 examples/s]
Map: 100%|██████████| 13/13 [00:00<00:00, 3175.47 examples/s]
Map: 100%|██████████| 1170/1170 [00:00<00:00, 8818.64 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 9/9 [00:00<00:00, 13.78it/s, loss=1.25] 


Epoch 1 Loss: 11.8495
Epoch 2/3


Training Epoch 2: 100%|██████████| 9/9 [00:00<00:00, 14.35it/s, loss=0.547]


Epoch 2 Loss: 6.8580
Epoch 3/3


Training Epoch 3: 100%|██████████| 9/9 [00:00<00:00, 15.18it/s, loss=0.166]


Epoch 3 Loss: 5.4369
Test Metrics: Precision=0.7922, Recall=0.7922, F1=0.7922

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 65, Split 4...


Map: 100%|██████████| 65/65 [00:00<00:00, 7025.63 examples/s]
Map: 100%|██████████| 13/13 [00:00<00:00, 3316.26 examples/s]
Map: 100%|██████████| 1170/1170 [00:00<00:00, 9116.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 9/9 [00:00<00:00, 11.68it/s, loss=1]    


Epoch 1 Loss: 10.7497
Epoch 2/3


Training Epoch 2: 100%|██████████| 9/9 [00:00<00:00, 10.99it/s, loss=0.719]


Epoch 2 Loss: 6.2821
Epoch 3/3


Training Epoch 3: 100%|██████████| 9/9 [00:00<00:00, 12.23it/s, loss=0.57] 


Epoch 3 Loss: 5.1189
Test Metrics: Precision=0.8051, Recall=0.8051, F1=0.8051

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 65, Split 5...


Map: 100%|██████████| 65/65 [00:00<00:00, 6616.26 examples/s]
Map: 100%|██████████| 13/13 [00:00<00:00, 3628.29 examples/s]
Map: 100%|██████████| 1170/1170 [00:00<00:00, 9329.77 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 9/9 [00:00<00:00, 11.88it/s, loss=0.494]


Epoch 1 Loss: 11.0978
Epoch 2/3


Training Epoch 2: 100%|██████████| 9/9 [00:00<00:00, 11.60it/s, loss=0.826]


Epoch 2 Loss: 7.2171
Epoch 3/3


Training Epoch 3: 100%|██████████| 9/9 [00:00<00:00, 11.10it/s, loss=1.28] 


Epoch 3 Loss: 6.1659
Test Metrics: Precision=0.7975, Recall=0.7975, F1=0.7975

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 70, Split 1...


Map: 100%|██████████| 70/70 [00:00<00:00, 6915.59 examples/s]
Map: 100%|██████████| 14/14 [00:00<00:00, 3680.83 examples/s]
Map: 100%|██████████| 1164/1164 [00:00<00:00, 9311.66 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 9/9 [00:00<00:00, 12.07it/s, loss=0.996]


Epoch 1 Loss: 12.1158
Epoch 2/3


Training Epoch 2: 100%|██████████| 9/9 [00:00<00:00, 11.86it/s, loss=0.73] 


Epoch 2 Loss: 7.4568
Epoch 3/3


Training Epoch 3: 100%|██████████| 9/9 [00:00<00:00, 11.65it/s, loss=0.452]


Epoch 3 Loss: 6.1103
Test Metrics: Precision=0.7772, Recall=0.7772, F1=0.7772

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 70, Split 2...


Map: 100%|██████████| 70/70 [00:00<00:00, 7220.18 examples/s]
Map: 100%|██████████| 14/14 [00:00<00:00, 3697.52 examples/s]
Map: 100%|██████████| 1164/1164 [00:00<00:00, 9235.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 9/9 [00:00<00:00, 10.66it/s, loss=1.02] 


Epoch 1 Loss: 12.1980
Epoch 2/3


Training Epoch 2: 100%|██████████| 9/9 [00:00<00:00, 11.53it/s, loss=0.72] 


Epoch 2 Loss: 7.5704
Epoch 3/3


Training Epoch 3: 100%|██████████| 9/9 [00:00<00:00, 11.34it/s, loss=0.607]


Epoch 3 Loss: 6.7272
Test Metrics: Precision=0.7775, Recall=0.7775, F1=0.7775

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 70, Split 3...


Map: 100%|██████████| 70/70 [00:00<00:00, 7855.55 examples/s]
Map: 100%|██████████| 14/14 [00:00<00:00, 4372.65 examples/s]
Map: 100%|██████████| 1164/1164 [00:00<00:00, 9155.86 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 9/9 [00:00<00:00, 13.29it/s, loss=0.881]


Epoch 1 Loss: 11.9923
Epoch 2/3


Training Epoch 2: 100%|██████████| 9/9 [00:00<00:00, 13.07it/s, loss=0.715]


Epoch 2 Loss: 6.5328
Epoch 3/3


Training Epoch 3: 100%|██████████| 9/9 [00:00<00:00, 13.37it/s, loss=0.592]


Epoch 3 Loss: 5.4348
Test Metrics: Precision=0.8271, Recall=0.8271, F1=0.8271

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 70, Split 4...


Map: 100%|██████████| 70/70 [00:00<00:00, 7546.04 examples/s]
Map: 100%|██████████| 14/14 [00:00<00:00, 3755.93 examples/s]
Map: 100%|██████████| 1164/1164 [00:00<00:00, 9075.42 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 9/9 [00:00<00:00, 10.66it/s, loss=0.91] 


Epoch 1 Loss: 11.9271
Epoch 2/3


Training Epoch 2: 100%|██████████| 9/9 [00:00<00:00, 10.61it/s, loss=0.948]


Epoch 2 Loss: 6.8329
Epoch 3/3


Training Epoch 3: 100%|██████████| 9/9 [00:00<00:00, 10.13it/s, loss=0.63] 


Epoch 3 Loss: 5.4602
Test Metrics: Precision=0.8011, Recall=0.8011, F1=0.8011

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 70, Split 5...


Map: 100%|██████████| 70/70 [00:00<00:00, 6828.25 examples/s]
Map: 100%|██████████| 14/14 [00:00<00:00, 3877.20 examples/s]
Map: 100%|██████████| 1164/1164 [00:00<00:00, 3732.03 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 9/9 [00:00<00:00, 10.53it/s, loss=0.617]


Epoch 1 Loss: 11.7625
Epoch 2/3


Training Epoch 2: 100%|██████████| 9/9 [00:00<00:00, 11.05it/s, loss=0.917]


Epoch 2 Loss: 7.0569
Epoch 3/3


Training Epoch 3: 100%|██████████| 9/9 [00:00<00:00, 11.22it/s, loss=0.444]


Epoch 3 Loss: 5.8339
Test Metrics: Precision=0.7854, Recall=0.7854, F1=0.7854

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 75, Split 1...


Map: 100%|██████████| 75/75 [00:00<00:00, 7581.71 examples/s]
Map: 100%|██████████| 15/15 [00:00<00:00, 4415.99 examples/s]
Map: 100%|██████████| 1158/1158 [00:00<00:00, 9263.77 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 10/10 [00:00<00:00, 11.79it/s, loss=0.86]


Epoch 1 Loss: 12.0993
Epoch 2/3


Training Epoch 2: 100%|██████████| 10/10 [00:00<00:00, 12.62it/s, loss=0.36]


Epoch 2 Loss: 7.1131
Epoch 3/3


Training Epoch 3: 100%|██████████| 10/10 [00:00<00:00, 12.30it/s, loss=0.532]


Epoch 3 Loss: 5.6712
Test Metrics: Precision=0.8207, Recall=0.8207, F1=0.8207

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 75, Split 2...


Map: 100%|██████████| 75/75 [00:00<00:00, 7351.21 examples/s]
Map: 100%|██████████| 15/15 [00:00<00:00, 3671.91 examples/s]
Map: 100%|██████████| 1158/1158 [00:00<00:00, 9303.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 10/10 [00:00<00:00, 11.18it/s, loss=0.743]


Epoch 1 Loss: 11.8664
Epoch 2/3


Training Epoch 2: 100%|██████████| 10/10 [00:00<00:00, 11.45it/s, loss=0.711]


Epoch 2 Loss: 7.4012
Epoch 3/3


Training Epoch 3: 100%|██████████| 10/10 [00:00<00:00, 12.02it/s, loss=1.1] 


Epoch 3 Loss: 6.6560
Test Metrics: Precision=0.8283, Recall=0.8283, F1=0.8283

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 75, Split 3...


Map: 100%|██████████| 75/75 [00:00<00:00, 8177.31 examples/s]
Map: 100%|██████████| 15/15 [00:00<00:00, 3612.46 examples/s]
Map: 100%|██████████| 1158/1158 [00:00<00:00, 9114.77 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 10/10 [00:00<00:00, 14.19it/s, loss=0.915]


Epoch 1 Loss: 12.2560
Epoch 2/3


Training Epoch 2: 100%|██████████| 10/10 [00:00<00:00, 13.67it/s, loss=0.547]


Epoch 2 Loss: 6.4889
Epoch 3/3


Training Epoch 3: 100%|██████████| 10/10 [00:00<00:00, 13.73it/s, loss=0.465]


Epoch 3 Loss: 5.5448
Test Metrics: Precision=0.8443, Recall=0.8443, F1=0.8443

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 75, Split 4...


Map: 100%|██████████| 75/75 [00:00<00:00, 7390.41 examples/s]
Map: 100%|██████████| 15/15 [00:00<00:00, 4263.95 examples/s]
Map: 100%|██████████| 1158/1158 [00:00<00:00, 9275.80 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 10/10 [00:00<00:00, 11.37it/s, loss=1.11]


Epoch 1 Loss: 12.7559
Epoch 2/3


Training Epoch 2: 100%|██████████| 10/10 [00:00<00:00, 11.24it/s, loss=0.533]


Epoch 2 Loss: 7.1101
Epoch 3/3


Training Epoch 3: 100%|██████████| 10/10 [00:00<00:00, 10.63it/s, loss=0.504]


Epoch 3 Loss: 5.7878
Test Metrics: Precision=0.8144, Recall=0.8144, F1=0.8144

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 75, Split 5...


Map: 100%|██████████| 75/75 [00:00<00:00, 7071.44 examples/s]
Map: 100%|██████████| 15/15 [00:00<00:00, 4321.05 examples/s]
Map: 100%|██████████| 1158/1158 [00:00<00:00, 9038.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 10/10 [00:00<00:00, 11.46it/s, loss=1.02]


Epoch 1 Loss: 12.0160
Epoch 2/3


Training Epoch 2: 100%|██████████| 10/10 [00:00<00:00, 11.61it/s, loss=0.691]


Epoch 2 Loss: 8.1284
Epoch 3/3


Training Epoch 3: 100%|██████████| 10/10 [00:00<00:00, 11.78it/s, loss=0.44]


Epoch 3 Loss: 6.6863
Test Metrics: Precision=0.7775, Recall=0.7775, F1=0.7775

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 80, Split 1...


Map: 100%|██████████| 80/80 [00:00<00:00, 7765.61 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 4803.10 examples/s]
Map: 100%|██████████| 1152/1152 [00:00<00:00, 9434.64 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 10/10 [00:00<00:00, 10.98it/s, loss=0.698]


Epoch 1 Loss: 11.4452
Epoch 2/3


Training Epoch 2: 100%|██████████| 10/10 [00:00<00:00, 11.20it/s, loss=0.682]


Epoch 2 Loss: 6.9291
Epoch 3/3


Training Epoch 3: 100%|██████████| 10/10 [00:00<00:00, 11.61it/s, loss=0.591]


Epoch 3 Loss: 5.7287
Test Metrics: Precision=0.8302, Recall=0.8302, F1=0.8302

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 80, Split 2...


Map: 100%|██████████| 80/80 [00:00<00:00, 7191.42 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 3396.89 examples/s]
Map: 100%|██████████| 1152/1152 [00:00<00:00, 9327.13 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 10/10 [00:00<00:00, 11.21it/s, loss=0.692]


Epoch 1 Loss: 12.4579
Epoch 2/3


Training Epoch 2: 100%|██████████| 10/10 [00:00<00:00, 10.53it/s, loss=0.744]


Epoch 2 Loss: 7.5983
Epoch 3/3


Training Epoch 3: 100%|██████████| 10/10 [00:00<00:00, 10.91it/s, loss=0.577]


Epoch 3 Loss: 6.3459
Test Metrics: Precision=0.8181, Recall=0.8181, F1=0.8181

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 80, Split 3...


Map: 100%|██████████| 80/80 [00:00<00:00, 8280.55 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 3426.37 examples/s]
Map: 100%|██████████| 1152/1152 [00:00<00:00, 9067.56 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 10/10 [00:00<00:00, 13.06it/s, loss=0.675]


Epoch 1 Loss: 13.5197
Epoch 2/3


Training Epoch 2: 100%|██████████| 10/10 [00:00<00:00, 14.46it/s, loss=0.532]


Epoch 2 Loss: 7.4618
Epoch 3/3


Training Epoch 3: 100%|██████████| 10/10 [00:00<00:00, 13.01it/s, loss=0.597]


Epoch 3 Loss: 6.0421
Test Metrics: Precision=0.8177, Recall=0.8177, F1=0.8177

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 80, Split 4...


Map: 100%|██████████| 80/80 [00:00<00:00, 7414.03 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 3375.02 examples/s]
Map: 100%|██████████| 1152/1152 [00:00<00:00, 9192.42 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 10/10 [00:00<00:00, 10.90it/s, loss=0.902]


Epoch 1 Loss: 12.1877
Epoch 2/3


Training Epoch 2: 100%|██████████| 10/10 [00:00<00:00, 11.95it/s, loss=0.556]


Epoch 2 Loss: 6.7628
Epoch 3/3


Training Epoch 3: 100%|██████████| 10/10 [00:00<00:00, 11.09it/s, loss=0.604]


Epoch 3 Loss: 5.3728
Test Metrics: Precision=0.8299, Recall=0.8299, F1=0.8299

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 80, Split 5...


Map: 100%|██████████| 80/80 [00:00<00:00, 7078.55 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 3764.45 examples/s]
Map: 100%|██████████| 1152/1152 [00:00<00:00, 9157.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 10/10 [00:00<00:00, 10.80it/s, loss=0.847]


Epoch 1 Loss: 11.7069
Epoch 2/3


Training Epoch 2: 100%|██████████| 10/10 [00:00<00:00, 10.88it/s, loss=0.589]


Epoch 2 Loss: 7.3024
Epoch 3/3


Training Epoch 3: 100%|██████████| 10/10 [00:00<00:00, 11.16it/s, loss=0.604]


Epoch 3 Loss: 6.0560
Test Metrics: Precision=0.8161, Recall=0.8161, F1=0.8161

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 85, Split 1...


Map: 100%|██████████| 85/85 [00:00<00:00, 7821.42 examples/s]
Map: 100%|██████████| 17/17 [00:00<00:00, 5156.81 examples/s]
Map: 100%|██████████| 1146/1146 [00:00<00:00, 9267.77 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 11/11 [00:00<00:00, 11.66it/s, loss=0.783]


Epoch 1 Loss: 12.5907
Epoch 2/3


Training Epoch 2: 100%|██████████| 11/11 [00:00<00:00, 11.83it/s, loss=0.598]


Epoch 2 Loss: 7.1604
Epoch 3/3


Training Epoch 3: 100%|██████████| 11/11 [00:00<00:00, 11.83it/s, loss=0.493]


Epoch 3 Loss: 5.5570
Test Metrics: Precision=0.8479, Recall=0.8479, F1=0.8479

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 85, Split 2...


Map: 100%|██████████| 85/85 [00:00<00:00, 7770.45 examples/s]
Map: 100%|██████████| 17/17 [00:00<00:00, 3811.98 examples/s]
Map: 100%|██████████| 1146/1146 [00:00<00:00, 9270.59 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 11/11 [00:00<00:00, 11.77it/s, loss=0.541]


Epoch 1 Loss: 13.8820
Epoch 2/3


Training Epoch 2: 100%|██████████| 11/11 [00:00<00:00, 11.12it/s, loss=0.769]


Epoch 2 Loss: 8.0144
Epoch 3/3


Training Epoch 3: 100%|██████████| 11/11 [00:00<00:00, 13.46it/s, loss=0.639]


Epoch 3 Loss: 6.5768
Test Metrics: Precision=0.8315, Recall=0.8315, F1=0.8315

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 85, Split 3...


Map: 100%|██████████| 85/85 [00:00<00:00, 8228.11 examples/s]
Map: 100%|██████████| 17/17 [00:00<00:00, 4049.48 examples/s]
Map: 100%|██████████| 1146/1146 [00:00<00:00, 8911.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 11/11 [00:00<00:00, 12.89it/s, loss=0.833]


Epoch 1 Loss: 12.8201
Epoch 2/3


Training Epoch 2: 100%|██████████| 11/11 [00:00<00:00, 14.08it/s, loss=0.716]


Epoch 2 Loss: 7.9183
Epoch 3/3


Training Epoch 3: 100%|██████████| 11/11 [00:00<00:00, 12.75it/s, loss=0.677]


Epoch 3 Loss: 6.1536
Test Metrics: Precision=0.8362, Recall=0.8362, F1=0.8362

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 85, Split 4...


Map: 100%|██████████| 85/85 [00:00<00:00, 7645.30 examples/s]
Map: 100%|██████████| 17/17 [00:00<00:00, 3262.85 examples/s]
Map: 100%|██████████| 1146/1146 [00:00<00:00, 9146.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 11/11 [00:00<00:00, 11.23it/s, loss=0.75] 


Epoch 1 Loss: 13.2994
Epoch 2/3


Training Epoch 2: 100%|██████████| 11/11 [00:01<00:00, 10.75it/s, loss=0.604]


Epoch 2 Loss: 7.0857
Epoch 3/3


Training Epoch 3: 100%|██████████| 11/11 [00:01<00:00, 10.84it/s, loss=0.518]


Epoch 3 Loss: 5.4973
Test Metrics: Precision=0.8534, Recall=0.8534, F1=0.8534

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 85, Split 5...


Map: 100%|██████████| 85/85 [00:00<00:00, 7165.00 examples/s]
Map: 100%|██████████| 17/17 [00:00<00:00, 4018.89 examples/s]
Map: 100%|██████████| 1146/1146 [00:00<00:00, 9294.58 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 11/11 [00:00<00:00, 11.93it/s, loss=0.711]


Epoch 1 Loss: 13.0295
Epoch 2/3


Training Epoch 2: 100%|██████████| 11/11 [00:01<00:00, 10.90it/s, loss=0.519]


Epoch 2 Loss: 7.2911
Epoch 3/3


Training Epoch 3: 100%|██████████| 11/11 [00:00<00:00, 11.15it/s, loss=0.625]


Epoch 3 Loss: 6.0358
Test Metrics: Precision=0.8419, Recall=0.8419, F1=0.8419

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 90, Split 1...


Map: 100%|██████████| 90/90 [00:00<00:00, 8218.75 examples/s]
Map: 100%|██████████| 18/18 [00:00<00:00, 4165.15 examples/s]
Map: 100%|██████████| 1140/1140 [00:00<00:00, 9357.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 12/12 [00:00<00:00, 12.12it/s, loss=0.639]


Epoch 1 Loss: 13.9020
Epoch 2/3


Training Epoch 2: 100%|██████████| 12/12 [00:00<00:00, 12.22it/s, loss=0.538]


Epoch 2 Loss: 8.1279
Epoch 3/3


Training Epoch 3: 100%|██████████| 12/12 [00:01<00:00, 11.62it/s, loss=0.564]


Epoch 3 Loss: 6.5012
Test Metrics: Precision=0.8387, Recall=0.8387, F1=0.8387

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 90, Split 2...


Map: 100%|██████████| 90/90 [00:00<00:00, 7506.06 examples/s]
Map: 100%|██████████| 18/18 [00:00<00:00, 3667.95 examples/s]
Map: 100%|██████████| 1140/1140 [00:00<00:00, 9353.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 12/12 [00:01<00:00, 11.20it/s, loss=0.744]


Epoch 1 Loss: 13.6472
Epoch 2/3


Training Epoch 2: 100%|██████████| 12/12 [00:00<00:00, 12.51it/s, loss=0.391]


Epoch 2 Loss: 8.3372
Epoch 3/3


Training Epoch 3: 100%|██████████| 12/12 [00:00<00:00, 12.25it/s, loss=0.451]


Epoch 3 Loss: 6.7185
Test Metrics: Precision=0.8254, Recall=0.8254, F1=0.8254

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 90, Split 3...


Map: 100%|██████████| 90/90 [00:00<00:00, 8536.96 examples/s]
Map: 100%|██████████| 18/18 [00:00<00:00, 5662.88 examples/s]
Map: 100%|██████████| 1140/1140 [00:00<00:00, 9411.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 12/12 [00:00<00:00, 13.48it/s, loss=0.315]


Epoch 1 Loss: 13.5172
Epoch 2/3


Training Epoch 2: 100%|██████████| 12/12 [00:00<00:00, 13.21it/s, loss=0.491]


Epoch 2 Loss: 7.4654
Epoch 3/3


Training Epoch 3: 100%|██████████| 12/12 [00:00<00:00, 13.78it/s, loss=0.427]


Epoch 3 Loss: 6.1276
Test Metrics: Precision=0.8499, Recall=0.8499, F1=0.8499

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 90, Split 4...


Map: 100%|██████████| 90/90 [00:00<00:00, 7688.13 examples/s]
Map: 100%|██████████| 18/18 [00:00<00:00, 3296.11 examples/s]
Map: 100%|██████████| 1140/1140 [00:00<00:00, 9447.13 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 12/12 [00:01<00:00, 11.13it/s, loss=0.538]


Epoch 1 Loss: 14.1323
Epoch 2/3


Training Epoch 2: 100%|██████████| 12/12 [00:01<00:00, 11.64it/s, loss=0.544]


Epoch 2 Loss: 7.6124
Epoch 3/3


Training Epoch 3: 100%|██████████| 12/12 [00:01<00:00, 11.39it/s, loss=0.363]


Epoch 3 Loss: 5.9522
Test Metrics: Precision=0.8489, Recall=0.8489, F1=0.8489

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 90, Split 5...


Map: 100%|██████████| 90/90 [00:00<00:00, 7399.10 examples/s]
Map: 100%|██████████| 18/18 [00:00<00:00, 4269.49 examples/s]
Map: 100%|██████████| 1140/1140 [00:00<00:00, 9187.70 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 12/12 [00:01<00:00, 11.34it/s, loss=0.642]


Epoch 1 Loss: 13.9178
Epoch 2/3


Training Epoch 2: 100%|██████████| 12/12 [00:01<00:00, 11.37it/s, loss=1.32] 


Epoch 2 Loss: 8.9304
Epoch 3/3


Training Epoch 3: 100%|██████████| 12/12 [00:01<00:00, 11.00it/s, loss=0.758]


Epoch 3 Loss: 7.0812
Test Metrics: Precision=0.8293, Recall=0.8293, F1=0.8293

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 95, Split 1...


Map: 100%|██████████| 95/95 [00:00<00:00, 8047.07 examples/s]
Map: 100%|██████████| 19/19 [00:00<00:00, 4233.29 examples/s]
Map: 100%|██████████| 1134/1134 [00:00<00:00, 9290.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 12/12 [00:01<00:00, 11.33it/s, loss=0.766]


Epoch 1 Loss: 13.6444
Epoch 2/3


Training Epoch 2: 100%|██████████| 12/12 [00:01<00:00, 11.61it/s, loss=0.667]


Epoch 2 Loss: 7.8119
Epoch 3/3


Training Epoch 3: 100%|██████████| 12/12 [00:01<00:00, 11.83it/s, loss=0.533]


Epoch 3 Loss: 6.1018
Test Metrics: Precision=0.8423, Recall=0.8423, F1=0.8423

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 95, Split 2...


Map: 100%|██████████| 95/95 [00:00<00:00, 7885.90 examples/s]
Map: 100%|██████████| 19/19 [00:00<00:00, 4495.00 examples/s]
Map: 100%|██████████| 1134/1134 [00:00<00:00, 9434.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 12/12 [00:01<00:00, 11.08it/s, loss=1.01] 


Epoch 1 Loss: 15.2729
Epoch 2/3


Training Epoch 2: 100%|██████████| 12/12 [00:01<00:00, 11.50it/s, loss=0.53] 


Epoch 2 Loss: 8.1199
Epoch 3/3


Training Epoch 3: 100%|██████████| 12/12 [00:01<00:00, 10.85it/s, loss=0.512]


Epoch 3 Loss: 6.7767
Test Metrics: Precision=0.8454, Recall=0.8454, F1=0.8454

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 95, Split 3...


Map: 100%|██████████| 95/95 [00:00<00:00, 8667.80 examples/s]
Map: 100%|██████████| 19/19 [00:00<00:00, 3398.80 examples/s]
Map: 100%|██████████| 1134/1134 [00:00<00:00, 9431.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 12/12 [00:00<00:00, 13.35it/s, loss=0.744]


Epoch 1 Loss: 13.1042
Epoch 2/3


Training Epoch 2: 100%|██████████| 12/12 [00:00<00:00, 13.00it/s, loss=0.668]


Epoch 2 Loss: 7.3337
Epoch 3/3


Training Epoch 3: 100%|██████████| 12/12 [00:00<00:00, 12.80it/s, loss=0.508]


Epoch 3 Loss: 5.9656
Test Metrics: Precision=0.8587, Recall=0.8587, F1=0.8587

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 95, Split 4...


Map: 100%|██████████| 95/95 [00:00<00:00, 8150.28 examples/s]
Map: 100%|██████████| 19/19 [00:00<00:00, 4790.61 examples/s]
Map: 100%|██████████| 1134/1134 [00:00<00:00, 9338.63 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 12/12 [00:01<00:00, 11.06it/s, loss=0.768]


Epoch 1 Loss: 14.1503
Epoch 2/3


Training Epoch 2: 100%|██████████| 12/12 [00:01<00:00, 11.10it/s, loss=0.534]


Epoch 2 Loss: 7.6226
Epoch 3/3


Training Epoch 3: 100%|██████████| 12/12 [00:01<00:00, 10.95it/s, loss=0.44] 


Epoch 3 Loss: 5.8866
Test Metrics: Precision=0.8468, Recall=0.8468, F1=0.8468

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 95, Split 5...


Map: 100%|██████████| 95/95 [00:00<00:00, 7896.53 examples/s]
Map: 100%|██████████| 19/19 [00:00<00:00, 4696.04 examples/s]
Map: 100%|██████████| 1134/1134 [00:00<00:00, 9184.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 12/12 [00:01<00:00, 11.43it/s, loss=0.881]


Epoch 1 Loss: 13.9641
Epoch 2/3


Training Epoch 2: 100%|██████████| 12/12 [00:01<00:00, 11.84it/s, loss=0.446]


Epoch 2 Loss: 8.0337
Epoch 3/3


Training Epoch 3: 100%|██████████| 12/12 [00:01<00:00, 11.12it/s, loss=0.459]


Epoch 3 Loss: 6.4502
Test Metrics: Precision=0.8422, Recall=0.8422, F1=0.8422

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 100, Split 1...


Map: 100%|██████████| 100/100 [00:00<00:00, 8167.27 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 4494.78 examples/s]
Map: 100%|██████████| 1128/1128 [00:00<00:00, 9394.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 13/13 [00:01<00:00, 11.01it/s, loss=0.602]


Epoch 1 Loss: 13.1522
Epoch 2/3


Training Epoch 2: 100%|██████████| 13/13 [00:01<00:00, 11.67it/s, loss=0.864]


Epoch 2 Loss: 7.9535
Epoch 3/3


Training Epoch 3: 100%|██████████| 13/13 [00:01<00:00, 11.45it/s, loss=0.336]


Epoch 3 Loss: 6.3602
Test Metrics: Precision=0.8484, Recall=0.8484, F1=0.8484

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 100, Split 2...


Map: 100%|██████████| 100/100 [00:00<00:00, 7881.07 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 3764.41 examples/s]
Map: 100%|██████████| 1128/1128 [00:00<00:00, 9096.77 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 13/13 [00:01<00:00, 11.04it/s, loss=0.663]


Epoch 1 Loss: 14.5969
Epoch 2/3


Training Epoch 2: 100%|██████████| 13/13 [00:01<00:00, 11.72it/s, loss=0.466]


Epoch 2 Loss: 8.3252
Epoch 3/3


Training Epoch 3: 100%|██████████| 13/13 [00:01<00:00, 11.56it/s, loss=0.759]


Epoch 3 Loss: 7.1293
Test Metrics: Precision=0.8537, Recall=0.8537, F1=0.8537

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 100, Split 3...


Map: 100%|██████████| 100/100 [00:00<00:00, 8562.08 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 5288.83 examples/s]
Map: 100%|██████████| 1128/1128 [00:00<00:00, 9012.31 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 13/13 [00:00<00:00, 13.29it/s, loss=1.17] 


Epoch 1 Loss: 14.5912
Epoch 2/3


Training Epoch 2: 100%|██████████| 13/13 [00:01<00:00, 12.87it/s, loss=0.465]


Epoch 2 Loss: 8.2956
Epoch 3/3


Training Epoch 3: 100%|██████████| 13/13 [00:00<00:00, 13.34it/s, loss=0.414]


Epoch 3 Loss: 6.3436
Test Metrics: Precision=0.8513, Recall=0.8513, F1=0.8513

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 100, Split 4...


Map: 100%|██████████| 100/100 [00:00<00:00, 8234.62 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 4149.49 examples/s]
Map: 100%|██████████| 1128/1128 [00:00<00:00, 9429.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 13/13 [00:01<00:00, 10.79it/s, loss=1.21] 


Epoch 1 Loss: 14.2147
Epoch 2/3


Training Epoch 2: 100%|██████████| 13/13 [00:01<00:00, 11.67it/s, loss=0.592]


Epoch 2 Loss: 7.6813
Epoch 3/3


Training Epoch 3: 100%|██████████| 13/13 [00:01<00:00, 11.81it/s, loss=0.354]


Epoch 3 Loss: 5.9984
Test Metrics: Precision=0.8504, Recall=0.8504, F1=0.8504

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 100, Split 5...


Map: 100%|██████████| 100/100 [00:00<00:00, 7794.80 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 5441.14 examples/s]
Map: 100%|██████████| 1128/1128 [00:00<00:00, 9146.84 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 13/13 [00:01<00:00, 11.21it/s, loss=0.799]


Epoch 1 Loss: 13.9283
Epoch 2/3


Training Epoch 2: 100%|██████████| 13/13 [00:01<00:00, 11.53it/s, loss=0.462]


Epoch 2 Loss: 8.0001
Epoch 3/3


Training Epoch 3: 100%|██████████| 13/13 [00:01<00:00, 11.26it/s, loss=0.383]


Epoch 3 Loss: 6.5964
Test Metrics: Precision=0.8500, Recall=0.8500, F1=0.8500

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 105, Split 1...


Map: 100%|██████████| 105/105 [00:00<00:00, 8295.07 examples/s]
Map: 100%|██████████| 21/21 [00:00<00:00, 4506.77 examples/s]
Map: 100%|██████████| 1122/1122 [00:00<00:00, 9244.83 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 14/14 [00:01<00:00, 11.95it/s, loss=0.539]


Epoch 1 Loss: 15.6685
Epoch 2/3


Training Epoch 2: 100%|██████████| 14/14 [00:01<00:00, 11.61it/s, loss=0.466]


Epoch 2 Loss: 8.6376
Epoch 3/3


Training Epoch 3: 100%|██████████| 14/14 [00:01<00:00, 11.93it/s, loss=0.458]


Epoch 3 Loss: 6.9330
Test Metrics: Precision=0.8550, Recall=0.8550, F1=0.8550

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 105, Split 2...


Map: 100%|██████████| 105/105 [00:00<00:00, 7956.39 examples/s]
Map: 100%|██████████| 21/21 [00:00<00:00, 4674.44 examples/s]
Map: 100%|██████████| 1122/1122 [00:00<00:00, 9448.87 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 14/14 [00:01<00:00, 11.40it/s, loss=1.34] 


Epoch 1 Loss: 16.3864
Epoch 2/3


Training Epoch 2: 100%|██████████| 14/14 [00:01<00:00, 11.52it/s, loss=0.636]


Epoch 2 Loss: 10.3894
Epoch 3/3


Training Epoch 3: 100%|██████████| 14/14 [00:01<00:00, 12.44it/s, loss=1.42] 


Epoch 3 Loss: 8.8946
Test Metrics: Precision=0.8360, Recall=0.8360, F1=0.8360

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 105, Split 3...


Map: 100%|██████████| 105/105 [00:00<00:00, 8036.97 examples/s]
Map: 100%|██████████| 21/21 [00:00<00:00, 5681.14 examples/s]
Map: 100%|██████████| 1122/1122 [00:00<00:00, 9137.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 14/14 [00:01<00:00, 13.42it/s, loss=1.2]  


Epoch 1 Loss: 15.1682
Epoch 2/3


Training Epoch 2: 100%|██████████| 14/14 [00:01<00:00, 13.11it/s, loss=0.253]


Epoch 2 Loss: 8.6610
Epoch 3/3


Training Epoch 3: 100%|██████████| 14/14 [00:01<00:00, 13.63it/s, loss=0.553]


Epoch 3 Loss: 7.0956
Test Metrics: Precision=0.8562, Recall=0.8562, F1=0.8562

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 105, Split 4...


Map: 100%|██████████| 105/105 [00:00<00:00, 8174.06 examples/s]
Map: 100%|██████████| 21/21 [00:00<00:00, 3891.68 examples/s]
Map: 100%|██████████| 1122/1122 [00:00<00:00, 9423.14 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 14/14 [00:01<00:00, 11.20it/s, loss=0.798]


Epoch 1 Loss: 14.7533
Epoch 2/3


Training Epoch 2: 100%|██████████| 14/14 [00:01<00:00, 11.73it/s, loss=0.618]


Epoch 2 Loss: 7.8026
Epoch 3/3


Training Epoch 3: 100%|██████████| 14/14 [00:01<00:00, 11.46it/s, loss=0.131]


Epoch 3 Loss: 5.6808
Test Metrics: Precision=0.8675, Recall=0.8675, F1=0.8675

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 105, Split 5...


Map: 100%|██████████| 105/105 [00:00<00:00, 7753.42 examples/s]
Map: 100%|██████████| 21/21 [00:00<00:00, 4210.75 examples/s]
Map: 100%|██████████| 1122/1122 [00:00<00:00, 9453.77 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 14/14 [00:01<00:00, 11.35it/s, loss=0.758]


Epoch 1 Loss: 16.6939
Epoch 2/3


Training Epoch 2: 100%|██████████| 14/14 [00:01<00:00, 11.56it/s, loss=0.523]


Epoch 2 Loss: 8.3317
Epoch 3/3


Training Epoch 3: 100%|██████████| 14/14 [00:01<00:00, 11.58it/s, loss=0.602]


Epoch 3 Loss: 6.8051
Test Metrics: Precision=0.8536, Recall=0.8536, F1=0.8536

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 110, Split 1...


Map: 100%|██████████| 110/110 [00:00<00:00, 8035.21 examples/s]
Map: 100%|██████████| 22/22 [00:00<00:00, 5414.55 examples/s]
Map: 100%|██████████| 1116/1116 [00:00<00:00, 9202.79 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 14/14 [00:01<00:00, 11.10it/s, loss=0.672]


Epoch 1 Loss: 13.3229
Epoch 2/3


Training Epoch 2: 100%|██████████| 14/14 [00:01<00:00, 12.10it/s, loss=0.718]


Epoch 2 Loss: 7.7114
Epoch 3/3


Training Epoch 3: 100%|██████████| 14/14 [00:01<00:00, 11.12it/s, loss=0.297]


Epoch 3 Loss: 5.9822
Test Metrics: Precision=0.8575, Recall=0.8575, F1=0.8575

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 110, Split 2...


Map: 100%|██████████| 110/110 [00:00<00:00, 8018.45 examples/s]
Map: 100%|██████████| 22/22 [00:00<00:00, 4601.54 examples/s]
Map: 100%|██████████| 1116/1116 [00:00<00:00, 9426.66 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 14/14 [00:01<00:00, 10.80it/s, loss=0.407]


Epoch 1 Loss: 14.6616
Epoch 2/3


Training Epoch 2: 100%|██████████| 14/14 [00:01<00:00, 11.64it/s, loss=0.56] 


Epoch 2 Loss: 8.2054
Epoch 3/3


Training Epoch 3: 100%|██████████| 14/14 [00:01<00:00, 11.09it/s, loss=0.482]


Epoch 3 Loss: 6.6564
Test Metrics: Precision=0.8564, Recall=0.8564, F1=0.8564

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 110, Split 3...


Map: 100%|██████████| 110/110 [00:00<00:00, 8873.59 examples/s]
Map: 100%|██████████| 22/22 [00:00<00:00, 4633.19 examples/s]
Map: 100%|██████████| 1116/1116 [00:00<00:00, 9152.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 14/14 [00:01<00:00, 12.70it/s, loss=0.896]


Epoch 1 Loss: 15.6982
Epoch 2/3


Training Epoch 2: 100%|██████████| 14/14 [00:01<00:00, 13.20it/s, loss=0.452]


Epoch 2 Loss: 8.5907
Epoch 3/3


Training Epoch 3: 100%|██████████| 14/14 [00:01<00:00, 12.59it/s, loss=0.404]


Epoch 3 Loss: 6.7579
Test Metrics: Precision=0.8558, Recall=0.8558, F1=0.8558

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 110, Split 4...


Map: 100%|██████████| 110/110 [00:00<00:00, 8259.46 examples/s]
Map: 100%|██████████| 22/22 [00:00<00:00, 4401.16 examples/s]
Map: 100%|██████████| 1116/1116 [00:00<00:00, 9170.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 14/14 [00:01<00:00, 11.44it/s, loss=0.803]


Epoch 1 Loss: 15.4998
Epoch 2/3


Training Epoch 2: 100%|██████████| 14/14 [00:01<00:00, 11.11it/s, loss=0.37] 


Epoch 2 Loss: 8.0552
Epoch 3/3


Training Epoch 3: 100%|██████████| 14/14 [00:01<00:00, 11.61it/s, loss=0.507]


Epoch 3 Loss: 6.3968
Test Metrics: Precision=0.8565, Recall=0.8565, F1=0.8565

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 110, Split 5...


Map: 100%|██████████| 110/110 [00:00<00:00, 7823.07 examples/s]
Map: 100%|██████████| 22/22 [00:00<00:00, 4828.10 examples/s]
Map: 100%|██████████| 1116/1116 [00:00<00:00, 9460.63 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 14/14 [00:01<00:00, 11.07it/s, loss=0.752]


Epoch 1 Loss: 16.1334
Epoch 2/3


Training Epoch 2: 100%|██████████| 14/14 [00:01<00:00, 11.43it/s, loss=0.475]


Epoch 2 Loss: 8.1504
Epoch 3/3


Training Epoch 3: 100%|██████████| 14/14 [00:01<00:00, 11.78it/s, loss=0.603]


Epoch 3 Loss: 6.9447
Test Metrics: Precision=0.8541, Recall=0.8541, F1=0.8541

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 115, Split 1...


Map: 100%|██████████| 115/115 [00:00<00:00, 8112.50 examples/s]
Map: 100%|██████████| 23/23 [00:00<00:00, 4639.72 examples/s]
Map: 100%|██████████| 1110/1110 [00:00<00:00, 9316.95 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 15/15 [00:01<00:00, 11.40it/s, loss=0.602]


Epoch 1 Loss: 15.6760
Epoch 2/3


Training Epoch 2: 100%|██████████| 15/15 [00:01<00:00, 11.42it/s, loss=0.444]


Epoch 2 Loss: 8.4613
Epoch 3/3


Training Epoch 3: 100%|██████████| 15/15 [00:01<00:00, 11.72it/s, loss=0.557]


Epoch 3 Loss: 6.8524
Test Metrics: Precision=0.8589, Recall=0.8589, F1=0.8589

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 115, Split 2...


Map: 100%|██████████| 115/115 [00:00<00:00, 8254.10 examples/s]
Map: 100%|██████████| 23/23 [00:00<00:00, 4324.99 examples/s]
Map: 100%|██████████| 1110/1110 [00:00<00:00, 9406.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 15/15 [00:01<00:00, 11.59it/s, loss=0.653]


Epoch 1 Loss: 15.9222
Epoch 2/3


Training Epoch 2: 100%|██████████| 15/15 [00:01<00:00, 11.67it/s, loss=0.445]


Epoch 2 Loss: 8.8023
Epoch 3/3


Training Epoch 3: 100%|██████████| 15/15 [00:01<00:00, 12.32it/s, loss=0.3]  


Epoch 3 Loss: 7.1640
Test Metrics: Precision=0.8574, Recall=0.8574, F1=0.8574

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 115, Split 3...


Map: 100%|██████████| 115/115 [00:00<00:00, 8566.34 examples/s]
Map: 100%|██████████| 23/23 [00:00<00:00, 4836.75 examples/s]
Map: 100%|██████████| 1110/1110 [00:00<00:00, 9275.87 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 15/15 [00:01<00:00, 13.10it/s, loss=0.992]


Epoch 1 Loss: 17.3897
Epoch 2/3


Training Epoch 2: 100%|██████████| 15/15 [00:01<00:00, 12.47it/s, loss=0.459]


Epoch 2 Loss: 9.7635
Epoch 3/3


Training Epoch 3: 100%|██████████| 15/15 [00:01<00:00, 12.96it/s, loss=0.555]


Epoch 3 Loss: 7.8269
Test Metrics: Precision=0.8494, Recall=0.8494, F1=0.8494

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 115, Split 4...


Map: 100%|██████████| 115/115 [00:00<00:00, 8725.65 examples/s]
Map: 100%|██████████| 23/23 [00:00<00:00, 4810.22 examples/s]
Map: 100%|██████████| 1110/1110 [00:00<00:00, 9315.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 15/15 [00:01<00:00, 12.00it/s, loss=0.477]


Epoch 1 Loss: 16.1111
Epoch 2/3


Training Epoch 2: 100%|██████████| 15/15 [00:01<00:00, 12.26it/s, loss=0.599]


Epoch 2 Loss: 8.5487
Epoch 3/3


Training Epoch 3: 100%|██████████| 15/15 [00:01<00:00, 11.41it/s, loss=0.368]


Epoch 3 Loss: 6.5170
Test Metrics: Precision=0.8575, Recall=0.8575, F1=0.8575

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 115, Split 5...


Map: 100%|██████████| 115/115 [00:00<00:00, 8156.26 examples/s]
Map: 100%|██████████| 23/23 [00:00<00:00, 5632.24 examples/s]
Map: 100%|██████████| 1110/1110 [00:00<00:00, 9345.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 15/15 [00:01<00:00, 11.72it/s, loss=1.09] 


Epoch 1 Loss: 16.7965
Epoch 2/3


Training Epoch 2: 100%|██████████| 15/15 [00:01<00:00, 11.87it/s, loss=0.707]


Epoch 2 Loss: 9.7031
Epoch 3/3


Training Epoch 3: 100%|██████████| 15/15 [00:01<00:00, 11.73it/s, loss=0.384]


Epoch 3 Loss: 7.5561
Test Metrics: Precision=0.8511, Recall=0.8511, F1=0.8511

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 120, Split 1...


Map: 100%|██████████| 120/120 [00:00<00:00, 8011.15 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 4641.21 examples/s]
Map: 100%|██████████| 1104/1104 [00:00<00:00, 9320.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 15/15 [00:01<00:00, 10.51it/s, loss=0.821]


Epoch 1 Loss: 16.5337
Epoch 2/3


Training Epoch 2: 100%|██████████| 15/15 [00:01<00:00, 11.28it/s, loss=0.68] 


Epoch 2 Loss: 8.8710
Epoch 3/3


Training Epoch 3: 100%|██████████| 15/15 [00:01<00:00, 11.84it/s, loss=0.446]


Epoch 3 Loss: 6.9789
Test Metrics: Precision=0.8572, Recall=0.8572, F1=0.8572

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 120, Split 2...


Map: 100%|██████████| 120/120 [00:00<00:00, 8264.23 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 5182.69 examples/s]
Map: 100%|██████████| 1104/1104 [00:00<00:00, 9299.80 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 15/15 [00:01<00:00, 11.34it/s, loss=0.562]


Epoch 1 Loss: 15.9941
Epoch 2/3


Training Epoch 2: 100%|██████████| 15/15 [00:01<00:00, 11.56it/s, loss=0.527]


Epoch 2 Loss: 8.7725
Epoch 3/3


Training Epoch 3: 100%|██████████| 15/15 [00:01<00:00, 11.41it/s, loss=0.584]


Epoch 3 Loss: 7.0691
Test Metrics: Precision=0.8616, Recall=0.8616, F1=0.8616

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 120, Split 3...


Map: 100%|██████████| 120/120 [00:00<00:00, 8479.62 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 4065.07 examples/s]
Map: 100%|██████████| 1104/1104 [00:00<00:00, 3638.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 15/15 [00:01<00:00, 12.17it/s, loss=0.694]


Epoch 1 Loss: 16.0255
Epoch 2/3


Training Epoch 2: 100%|██████████| 15/15 [00:01<00:00, 11.56it/s, loss=0.533]


Epoch 2 Loss: 8.7030
Epoch 3/3


Training Epoch 3: 100%|██████████| 15/15 [00:01<00:00, 12.24it/s, loss=0.558]


Epoch 3 Loss: 6.7751
Test Metrics: Precision=0.8678, Recall=0.8678, F1=0.8678

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 120, Split 4...


Map: 100%|██████████| 120/120 [00:00<00:00, 8639.74 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 5500.43 examples/s]
Map: 100%|██████████| 1104/1104 [00:00<00:00, 9193.74 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 15/15 [00:01<00:00, 11.24it/s, loss=0.915]


Epoch 1 Loss: 16.7398
Epoch 2/3


Training Epoch 2: 100%|██████████| 15/15 [00:01<00:00, 11.19it/s, loss=0.477]


Epoch 2 Loss: 8.6639
Epoch 3/3


Training Epoch 3: 100%|██████████| 15/15 [00:01<00:00, 11.47it/s, loss=0.524]


Epoch 3 Loss: 6.7498
Test Metrics: Precision=0.8570, Recall=0.8570, F1=0.8570

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 120, Split 5...


Map: 100%|██████████| 120/120 [00:00<00:00, 7990.67 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 6329.83 examples/s]
Map: 100%|██████████| 1104/1104 [00:00<00:00, 9345.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 15/15 [00:01<00:00, 11.28it/s, loss=0.573]


Epoch 1 Loss: 15.8706
Epoch 2/3


Training Epoch 2: 100%|██████████| 15/15 [00:01<00:00, 11.50it/s, loss=0.651]


Epoch 2 Loss: 9.0243
Epoch 3/3


Training Epoch 3: 100%|██████████| 15/15 [00:01<00:00, 11.21it/s, loss=0.352]


Epoch 3 Loss: 7.0144
Test Metrics: Precision=0.8522, Recall=0.8522, F1=0.8522

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 125, Split 1...


Map: 100%|██████████| 125/125 [00:00<00:00, 8124.34 examples/s]
Map: 100%|██████████| 25/25 [00:00<00:00, 5053.13 examples/s]
Map: 100%|██████████| 1098/1098 [00:00<00:00, 9284.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 16/16 [00:01<00:00, 11.62it/s, loss=0.696]


Epoch 1 Loss: 15.9022
Epoch 2/3


Training Epoch 2: 100%|██████████| 16/16 [00:01<00:00, 12.17it/s, loss=0.421]


Epoch 2 Loss: 8.8966
Epoch 3/3


Training Epoch 3: 100%|██████████| 16/16 [00:01<00:00, 11.60it/s, loss=0.471]


Epoch 3 Loss: 6.8923
Test Metrics: Precision=0.8613, Recall=0.8613, F1=0.8613

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 125, Split 2...


Map: 100%|██████████| 125/125 [00:00<00:00, 8193.79 examples/s]
Map: 100%|██████████| 25/25 [00:00<00:00, 5683.95 examples/s]
Map: 100%|██████████| 1098/1098 [00:00<00:00, 9315.47 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 16/16 [00:01<00:00, 11.14it/s, loss=0.405]


Epoch 1 Loss: 17.0516
Epoch 2/3


Training Epoch 2: 100%|██████████| 16/16 [00:01<00:00, 11.53it/s, loss=0.571]


Epoch 2 Loss: 9.9021
Epoch 3/3


Training Epoch 3: 100%|██████████| 16/16 [00:01<00:00, 11.17it/s, loss=0.45] 


Epoch 3 Loss: 7.8193
Test Metrics: Precision=0.8521, Recall=0.8521, F1=0.8521

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 125, Split 3...


Map: 100%|██████████| 125/125 [00:00<00:00, 8411.35 examples/s]
Map: 100%|██████████| 25/25 [00:00<00:00, 4488.19 examples/s]
Map: 100%|██████████| 1098/1098 [00:00<00:00, 9195.35 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 16/16 [00:01<00:00, 12.22it/s, loss=0.612]


Epoch 1 Loss: 17.8040
Epoch 2/3


Training Epoch 2: 100%|██████████| 16/16 [00:01<00:00, 12.45it/s, loss=0.695]


Epoch 2 Loss: 8.8354
Epoch 3/3


Training Epoch 3: 100%|██████████| 16/16 [00:01<00:00, 12.20it/s, loss=0.392]


Epoch 3 Loss: 7.0847
Test Metrics: Precision=0.8645, Recall=0.8645, F1=0.8645

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 125, Split 4...


Map: 100%|██████████| 125/125 [00:00<00:00, 8698.99 examples/s]
Map: 100%|██████████| 25/25 [00:00<00:00, 4499.55 examples/s]
Map: 100%|██████████| 1098/1098 [00:00<00:00, 9209.27 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 16/16 [00:01<00:00, 12.55it/s, loss=0.723]


Epoch 1 Loss: 17.5394
Epoch 2/3


Training Epoch 2: 100%|██████████| 16/16 [00:01<00:00, 11.98it/s, loss=0.524]


Epoch 2 Loss: 8.6589
Epoch 3/3


Training Epoch 3: 100%|██████████| 16/16 [00:01<00:00, 11.39it/s, loss=0.408]


Epoch 3 Loss: 6.5649
Test Metrics: Precision=0.8595, Recall=0.8595, F1=0.8595

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 125, Split 5...


Map: 100%|██████████| 125/125 [00:00<00:00, 8419.19 examples/s]
Map: 100%|██████████| 25/25 [00:00<00:00, 5320.02 examples/s]
Map: 100%|██████████| 1098/1098 [00:00<00:00, 9535.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 16/16 [00:01<00:00, 11.34it/s, loss=0.781]


Epoch 1 Loss: 17.1791
Epoch 2/3


Training Epoch 2: 100%|██████████| 16/16 [00:01<00:00, 11.98it/s, loss=0.511]


Epoch 2 Loss: 9.9448
Epoch 3/3


Training Epoch 3: 100%|██████████| 16/16 [00:01<00:00, 11.46it/s, loss=0.54] 


Epoch 3 Loss: 7.8830
Test Metrics: Precision=0.8562, Recall=0.8562, F1=0.8562

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 130, Split 1...


Map: 100%|██████████| 130/130 [00:00<00:00, 8606.28 examples/s]
Map: 100%|██████████| 26/26 [00:00<00:00, 5092.08 examples/s]
Map: 100%|██████████| 1092/1092 [00:00<00:00, 9250.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 17/17 [00:01<00:00, 11.84it/s, loss=0.584]


Epoch 1 Loss: 16.4254
Epoch 2/3


Training Epoch 2: 100%|██████████| 17/17 [00:01<00:00, 12.09it/s, loss=0.574]


Epoch 2 Loss: 8.6285
Epoch 3/3


Training Epoch 3: 100%|██████████| 17/17 [00:01<00:00, 12.36it/s, loss=0.472]


Epoch 3 Loss: 6.6712
Test Metrics: Precision=0.8643, Recall=0.8643, F1=0.8643

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 130, Split 2...


Map: 100%|██████████| 130/130 [00:00<00:00, 7689.89 examples/s]
Map: 100%|██████████| 26/26 [00:00<00:00, 3492.34 examples/s]
Map: 100%|██████████| 1092/1092 [00:00<00:00, 9259.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 17/17 [00:01<00:00, 11.64it/s, loss=0.891]


Epoch 1 Loss: 17.9090
Epoch 2/3


Training Epoch 2: 100%|██████████| 17/17 [00:01<00:00, 11.26it/s, loss=1.12] 


Epoch 2 Loss: 10.2790
Epoch 3/3


Training Epoch 3: 100%|██████████| 17/17 [00:01<00:00, 11.47it/s, loss=0.855]


Epoch 3 Loss: 8.0965
Test Metrics: Precision=0.8616, Recall=0.8616, F1=0.8616

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 130, Split 3...


Map: 100%|██████████| 130/130 [00:00<00:00, 8633.67 examples/s]
Map: 100%|██████████| 26/26 [00:00<00:00, 4022.13 examples/s]
Map: 100%|██████████| 1092/1092 [00:00<00:00, 9327.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 17/17 [00:01<00:00, 12.06it/s, loss=0.348]


Epoch 1 Loss: 16.4083
Epoch 2/3


Training Epoch 2: 100%|██████████| 17/17 [00:01<00:00, 12.02it/s, loss=0.347]


Epoch 2 Loss: 8.6167
Epoch 3/3


Training Epoch 3: 100%|██████████| 17/17 [00:01<00:00, 11.88it/s, loss=0.34] 


Epoch 3 Loss: 7.1066
Test Metrics: Precision=0.8658, Recall=0.8658, F1=0.8658

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 130, Split 4...


Map: 100%|██████████| 130/130 [00:00<00:00, 8836.98 examples/s]
Map: 100%|██████████| 26/26 [00:00<00:00, 5576.11 examples/s]
Map: 100%|██████████| 1092/1092 [00:00<00:00, 9397.75 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 17/17 [00:01<00:00, 12.22it/s, loss=0.651]


Epoch 1 Loss: 16.5706
Epoch 2/3


Training Epoch 2: 100%|██████████| 17/17 [00:01<00:00, 12.11it/s, loss=0.219]


Epoch 2 Loss: 8.4383
Epoch 3/3


Training Epoch 3: 100%|██████████| 17/17 [00:01<00:00, 11.90it/s, loss=0.207]


Epoch 3 Loss: 6.6084
Test Metrics: Precision=0.8630, Recall=0.8630, F1=0.8630

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 130, Split 5...


Map: 100%|██████████| 130/130 [00:00<00:00, 8034.00 examples/s]
Map: 100%|██████████| 26/26 [00:00<00:00, 4845.03 examples/s]
Map: 100%|██████████| 1092/1092 [00:00<00:00, 9291.67 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 17/17 [00:01<00:00, 11.65it/s, loss=0.619]


Epoch 1 Loss: 17.1762
Epoch 2/3


Training Epoch 2: 100%|██████████| 17/17 [00:01<00:00, 11.75it/s, loss=0.371]


Epoch 2 Loss: 9.2576
Epoch 3/3


Training Epoch 3: 100%|██████████| 17/17 [00:01<00:00, 11.98it/s, loss=0.563]


Epoch 3 Loss: 7.7035
Test Metrics: Precision=0.8580, Recall=0.8580, F1=0.8580

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 135, Split 1...


Map: 100%|██████████| 135/135 [00:00<00:00, 8249.29 examples/s]
Map: 100%|██████████| 27/27 [00:00<00:00, 5543.40 examples/s]
Map: 100%|██████████| 1086/1086 [00:00<00:00, 9095.82 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 17/17 [00:01<00:00, 11.49it/s, loss=0.775]


Epoch 1 Loss: 15.7813
Epoch 2/3


Training Epoch 2: 100%|██████████| 17/17 [00:01<00:00, 11.18it/s, loss=0.62] 


Epoch 2 Loss: 8.0752
Epoch 3/3


Training Epoch 3: 100%|██████████| 17/17 [00:01<00:00, 11.70it/s, loss=0.453]


Epoch 3 Loss: 6.3650
Test Metrics: Precision=0.8652, Recall=0.8652, F1=0.8652

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 135, Split 2...


Map: 100%|██████████| 135/135 [00:00<00:00, 7519.07 examples/s]
Map: 100%|██████████| 27/27 [00:00<00:00, 4734.77 examples/s]
Map: 100%|██████████| 1086/1086 [00:00<00:00, 3527.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 17/17 [00:01<00:00, 11.64it/s, loss=0.836]


Epoch 1 Loss: 17.9382
Epoch 2/3


Training Epoch 2: 100%|██████████| 17/17 [00:01<00:00, 11.31it/s, loss=0.429]


Epoch 2 Loss: 9.9329
Epoch 3/3


Training Epoch 3: 100%|██████████| 17/17 [00:01<00:00, 11.04it/s, loss=0.509]


Epoch 3 Loss: 8.2288
Test Metrics: Precision=0.8552, Recall=0.8552, F1=0.8552

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 135, Split 3...


Map: 100%|██████████| 135/135 [00:00<00:00, 7750.22 examples/s]
Map: 100%|██████████| 27/27 [00:00<00:00, 5215.59 examples/s]
Map: 100%|██████████| 1086/1086 [00:00<00:00, 9074.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 17/17 [00:01<00:00, 11.37it/s, loss=0.552]


Epoch 1 Loss: 17.4056
Epoch 2/3


Training Epoch 2: 100%|██████████| 17/17 [00:01<00:00, 11.62it/s, loss=0.57] 


Epoch 2 Loss: 9.6984
Epoch 3/3


Training Epoch 3: 100%|██████████| 17/17 [00:01<00:00, 12.27it/s, loss=0.562]


Epoch 3 Loss: 7.5690
Test Metrics: Precision=0.8636, Recall=0.8636, F1=0.8636

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 135, Split 4...


Map: 100%|██████████| 135/135 [00:00<00:00, 8930.53 examples/s]
Map: 100%|██████████| 27/27 [00:00<00:00, 4940.29 examples/s]
Map: 100%|██████████| 1086/1086 [00:00<00:00, 9170.85 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 17/17 [00:01<00:00, 11.86it/s, loss=0.693]


Epoch 1 Loss: 17.2553
Epoch 2/3


Training Epoch 2: 100%|██████████| 17/17 [00:01<00:00, 12.59it/s, loss=0.613]


Epoch 2 Loss: 8.4688
Epoch 3/3


Training Epoch 3: 100%|██████████| 17/17 [00:01<00:00, 11.85it/s, loss=0.423]


Epoch 3 Loss: 6.6772
Test Metrics: Precision=0.8718, Recall=0.8718, F1=0.8718

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 135, Split 5...


Map: 100%|██████████| 135/135 [00:00<00:00, 8119.64 examples/s]
Map: 100%|██████████| 27/27 [00:00<00:00, 5326.48 examples/s]
Map: 100%|██████████| 1086/1086 [00:00<00:00, 9109.75 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 17/17 [00:01<00:00, 11.26it/s, loss=0.565]


Epoch 1 Loss: 16.6686
Epoch 2/3


Training Epoch 2: 100%|██████████| 17/17 [00:01<00:00, 11.96it/s, loss=0.5]  


Epoch 2 Loss: 8.6201
Epoch 3/3


Training Epoch 3: 100%|██████████| 17/17 [00:01<00:00, 11.77it/s, loss=0.25] 


Epoch 3 Loss: 7.1101
Test Metrics: Precision=0.8625, Recall=0.8625, F1=0.8625

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 140, Split 1...


Map: 100%|██████████| 140/140 [00:00<00:00, 8347.11 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 4578.04 examples/s]
Map: 100%|██████████| 1080/1080 [00:00<00:00, 9113.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 18/18 [00:01<00:00, 11.54it/s, loss=0.457]


Epoch 1 Loss: 17.6695
Epoch 2/3


Training Epoch 2: 100%|██████████| 18/18 [00:01<00:00, 11.75it/s, loss=0.432]


Epoch 2 Loss: 9.0566
Epoch 3/3


Training Epoch 3: 100%|██████████| 18/18 [00:01<00:00, 12.02it/s, loss=0.572]


Epoch 3 Loss: 7.1901
Test Metrics: Precision=0.8599, Recall=0.8599, F1=0.8599

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 140, Split 2...


Map: 100%|██████████| 140/140 [00:00<00:00, 8448.11 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 5369.94 examples/s]
Map: 100%|██████████| 1080/1080 [00:00<00:00, 9418.31 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 18/18 [00:01<00:00, 11.15it/s, loss=0.502]


Epoch 1 Loss: 18.5174
Epoch 2/3


Training Epoch 2: 100%|██████████| 18/18 [00:01<00:00, 12.50it/s, loss=0.217]


Epoch 2 Loss: 10.1583
Epoch 3/3


Training Epoch 3: 100%|██████████| 18/18 [00:01<00:00, 12.62it/s, loss=0.372]


Epoch 3 Loss: 8.3429
Test Metrics: Precision=0.8656, Recall=0.8656, F1=0.8656

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 140, Split 3...


Map: 100%|██████████| 140/140 [00:00<00:00, 8818.58 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 5662.51 examples/s]
Map: 100%|██████████| 1080/1080 [00:00<00:00, 9228.24 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 18/18 [00:01<00:00, 11.73it/s, loss=0.98] 


Epoch 1 Loss: 19.7486
Epoch 2/3


Training Epoch 2: 100%|██████████| 18/18 [00:01<00:00, 11.84it/s, loss=0.351]


Epoch 2 Loss: 11.9743
Epoch 3/3


Training Epoch 3: 100%|██████████| 18/18 [00:01<00:00, 12.06it/s, loss=0.473]


Epoch 3 Loss: 9.0261
Test Metrics: Precision=0.8523, Recall=0.8523, F1=0.8523

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 140, Split 4...


Map: 100%|██████████| 140/140 [00:00<00:00, 9077.74 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 5226.55 examples/s]
Map: 100%|██████████| 1080/1080 [00:00<00:00, 9089.58 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 18/18 [00:01<00:00, 11.42it/s, loss=0.636]


Epoch 1 Loss: 18.2380
Epoch 2/3


Training Epoch 2: 100%|██████████| 18/18 [00:01<00:00, 11.81it/s, loss=0.411]


Epoch 2 Loss: 8.2159
Epoch 3/3


Training Epoch 3: 100%|██████████| 18/18 [00:01<00:00, 12.10it/s, loss=0.429]


Epoch 3 Loss: 6.7407
Test Metrics: Precision=0.8678, Recall=0.8678, F1=0.8678

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 140, Split 5...


Map: 100%|██████████| 140/140 [00:00<00:00, 8371.03 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 5499.70 examples/s]
Map: 100%|██████████| 1080/1080 [00:00<00:00, 9333.85 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 18/18 [00:01<00:00, 11.59it/s, loss=0.62] 


Epoch 1 Loss: 17.7171
Epoch 2/3


Training Epoch 2: 100%|██████████| 18/18 [00:01<00:00, 11.27it/s, loss=0.616]


Epoch 2 Loss: 9.6171
Epoch 3/3


Training Epoch 3: 100%|██████████| 18/18 [00:01<00:00, 11.92it/s, loss=0.682]


Epoch 3 Loss: 7.7188
Test Metrics: Precision=0.8607, Recall=0.8607, F1=0.8607

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 145, Split 1...


Map: 100%|██████████| 145/145 [00:00<00:00, 8616.44 examples/s]
Map: 100%|██████████| 29/29 [00:00<00:00, 5630.46 examples/s]
Map: 100%|██████████| 1074/1074 [00:00<00:00, 9306.29 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 19/19 [00:01<00:00, 11.86it/s, loss=0.336]


Epoch 1 Loss: 18.2830
Epoch 2/3


Training Epoch 2: 100%|██████████| 19/19 [00:01<00:00, 11.83it/s, loss=0.237]


Epoch 2 Loss: 9.8139
Epoch 3/3


Training Epoch 3: 100%|██████████| 19/19 [00:01<00:00, 11.75it/s, loss=0.424]


Epoch 3 Loss: 7.9109
Test Metrics: Precision=0.8600, Recall=0.8600, F1=0.8600

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 145, Split 2...


Map: 100%|██████████| 145/145 [00:00<00:00, 8609.12 examples/s]
Map: 100%|██████████| 29/29 [00:00<00:00, 4943.90 examples/s]
Map: 100%|██████████| 1074/1074 [00:00<00:00, 9241.97 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 19/19 [00:01<00:00, 11.28it/s, loss=0.705]


Epoch 1 Loss: 18.4860
Epoch 2/3


Training Epoch 2: 100%|██████████| 19/19 [00:01<00:00, 11.29it/s, loss=0.349]


Epoch 2 Loss: 9.8129
Epoch 3/3


Training Epoch 3: 100%|██████████| 19/19 [00:01<00:00, 12.05it/s, loss=0.135]


Epoch 3 Loss: 7.9582
Test Metrics: Precision=0.8641, Recall=0.8641, F1=0.8641

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 145, Split 3...


Map: 100%|██████████| 145/145 [00:00<00:00, 8555.47 examples/s]
Map: 100%|██████████| 29/29 [00:00<00:00, 5771.52 examples/s]
Map: 100%|██████████| 1074/1074 [00:00<00:00, 9051.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 19/19 [00:01<00:00, 11.68it/s, loss=0.73] 


Epoch 1 Loss: 18.5342
Epoch 2/3


Training Epoch 2: 100%|██████████| 19/19 [00:01<00:00, 11.36it/s, loss=0.262]


Epoch 2 Loss: 9.2702
Epoch 3/3


Training Epoch 3: 100%|██████████| 19/19 [00:01<00:00, 11.99it/s, loss=0.573]


Epoch 3 Loss: 7.6369
Test Metrics: Precision=0.8678, Recall=0.8678, F1=0.8678

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 145, Split 4...


Map: 100%|██████████| 145/145 [00:00<00:00, 7809.92 examples/s]
Map: 100%|██████████| 29/29 [00:00<00:00, 5998.07 examples/s]
Map: 100%|██████████| 1074/1074 [00:00<00:00, 9141.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 19/19 [00:01<00:00, 11.91it/s, loss=0.437]


Epoch 1 Loss: 17.3542
Epoch 2/3


Training Epoch 2: 100%|██████████| 19/19 [00:01<00:00, 12.40it/s, loss=0.569]


Epoch 2 Loss: 8.6738
Epoch 3/3


Training Epoch 3: 100%|██████████| 19/19 [00:01<00:00, 12.22it/s, loss=0.261]


Epoch 3 Loss: 6.6287
Test Metrics: Precision=0.8708, Recall=0.8708, F1=0.8708

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 145, Split 5...


Map: 100%|██████████| 145/145 [00:00<00:00, 8059.56 examples/s]
Map: 100%|██████████| 29/29 [00:00<00:00, 4493.51 examples/s]
Map: 100%|██████████| 1074/1074 [00:00<00:00, 9034.81 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 19/19 [00:01<00:00, 11.82it/s, loss=0.464]


Epoch 1 Loss: 17.9679
Epoch 2/3


Training Epoch 2: 100%|██████████| 19/19 [00:01<00:00, 11.52it/s, loss=0.208]


Epoch 2 Loss: 9.9105
Epoch 3/3


Training Epoch 3: 100%|██████████| 19/19 [00:01<00:00, 11.89it/s, loss=0.447]


Epoch 3 Loss: 8.4780
Test Metrics: Precision=0.8647, Recall=0.8647, F1=0.8647

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 150, Split 1...


Map: 100%|██████████| 150/150 [00:00<00:00, 8501.16 examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 5962.62 examples/s]
Map: 100%|██████████| 1068/1068 [00:00<00:00, 9113.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 19/19 [00:01<00:00, 11.58it/s, loss=0.52] 


Epoch 1 Loss: 18.5726
Epoch 2/3


Training Epoch 2: 100%|██████████| 19/19 [00:01<00:00, 11.85it/s, loss=0.51] 


Epoch 2 Loss: 9.3800
Epoch 3/3


Training Epoch 3: 100%|██████████| 19/19 [00:01<00:00, 11.87it/s, loss=0.377]


Epoch 3 Loss: 7.3234
Test Metrics: Precision=0.8675, Recall=0.8675, F1=0.8675

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 150, Split 2...


Map: 100%|██████████| 150/150 [00:00<00:00, 8165.31 examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 5127.93 examples/s]
Map: 100%|██████████| 1068/1068 [00:00<00:00, 9301.63 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 19/19 [00:01<00:00, 11.01it/s, loss=0.778]


Epoch 1 Loss: 20.3239
Epoch 2/3


Training Epoch 2: 100%|██████████| 19/19 [00:01<00:00, 11.79it/s, loss=0.562]


Epoch 2 Loss: 12.0711
Epoch 3/3


Training Epoch 3: 100%|██████████| 19/19 [00:01<00:00, 11.59it/s, loss=0.582]


Epoch 3 Loss: 9.3402
Test Metrics: Precision=0.8539, Recall=0.8539, F1=0.8539

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 150, Split 3...


Map: 100%|██████████| 150/150 [00:00<00:00, 8593.83 examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 4405.16 examples/s]
Map: 100%|██████████| 1068/1068 [00:00<00:00, 9408.69 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 19/19 [00:01<00:00, 11.50it/s, loss=0.446]


Epoch 1 Loss: 17.8933
Epoch 2/3


Training Epoch 2: 100%|██████████| 19/19 [00:01<00:00, 11.90it/s, loss=0.363]


Epoch 2 Loss: 9.4944
Epoch 3/3


Training Epoch 3: 100%|██████████| 19/19 [00:01<00:00, 11.79it/s, loss=0.278]


Epoch 3 Loss: 7.1711
Test Metrics: Precision=0.8735, Recall=0.8735, F1=0.8735

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 150, Split 4...


Map: 100%|██████████| 150/150 [00:00<00:00, 8705.13 examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 5660.84 examples/s]
Map: 100%|██████████| 1068/1068 [00:00<00:00, 9121.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 19/19 [00:01<00:00, 11.85it/s, loss=0.578]


Epoch 1 Loss: 18.5347
Epoch 2/3


Training Epoch 2: 100%|██████████| 19/19 [00:01<00:00, 11.83it/s, loss=0.5]  


Epoch 2 Loss: 8.9513
Epoch 3/3


Training Epoch 3: 100%|██████████| 19/19 [00:01<00:00, 12.23it/s, loss=0.425]


Epoch 3 Loss: 6.9851
Test Metrics: Precision=0.8714, Recall=0.8714, F1=0.8714

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 150, Split 5...


Map: 100%|██████████| 150/150 [00:00<00:00, 8141.54 examples/s]
Map: 100%|██████████| 30/30 [00:00<00:00, 5672.83 examples/s]
Map: 100%|██████████| 1068/1068 [00:00<00:00, 9167.26 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 19/19 [00:01<00:00, 10.70it/s, loss=0.636]


Epoch 1 Loss: 20.4794
Epoch 2/3


Training Epoch 2: 100%|██████████| 19/19 [00:01<00:00, 11.58it/s, loss=0.553]


Epoch 2 Loss: 11.2626
Epoch 3/3


Training Epoch 3: 100%|██████████| 19/19 [00:01<00:00, 11.37it/s, loss=0.386]


Epoch 3 Loss: 8.7440
Test Metrics: Precision=0.8654, Recall=0.8654, F1=0.8654

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 155, Split 1...


Map: 100%|██████████| 155/155 [00:00<00:00, 8600.57 examples/s]
Map: 100%|██████████| 31/31 [00:00<00:00, 5432.13 examples/s]
Map: 100%|██████████| 1062/1062 [00:00<00:00, 9327.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 20/20 [00:01<00:00, 11.86it/s, loss=0.311]


Epoch 1 Loss: 19.4676
Epoch 2/3


Training Epoch 2: 100%|██████████| 20/20 [00:01<00:00, 12.07it/s, loss=0.367]


Epoch 2 Loss: 9.7669
Epoch 3/3


Training Epoch 3: 100%|██████████| 20/20 [00:01<00:00, 11.70it/s, loss=0.638]


Epoch 3 Loss: 7.9076
Test Metrics: Precision=0.8648, Recall=0.8648, F1=0.8648

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 155, Split 2...


Map: 100%|██████████| 155/155 [00:00<00:00, 8513.62 examples/s]
Map: 100%|██████████| 31/31 [00:00<00:00, 4783.09 examples/s]
Map: 100%|██████████| 1062/1062 [00:00<00:00, 9367.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 20/20 [00:01<00:00, 11.99it/s, loss=0.396]


Epoch 1 Loss: 18.3331
Epoch 2/3


Training Epoch 2: 100%|██████████| 20/20 [00:01<00:00, 11.90it/s, loss=0.233]


Epoch 2 Loss: 9.2449
Epoch 3/3


Training Epoch 3: 100%|██████████| 20/20 [00:01<00:00, 12.24it/s, loss=0.677]


Epoch 3 Loss: 7.5748
Test Metrics: Precision=0.8723, Recall=0.8723, F1=0.8723

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 155, Split 3...


Map: 100%|██████████| 155/155 [00:00<00:00, 8516.52 examples/s]
Map: 100%|██████████| 31/31 [00:00<00:00, 5834.57 examples/s]
Map: 100%|██████████| 1062/1062 [00:00<00:00, 9229.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 20/20 [00:01<00:00, 12.39it/s, loss=0.896]


Epoch 1 Loss: 19.1724
Epoch 2/3


Training Epoch 2: 100%|██████████| 20/20 [00:01<00:00, 12.04it/s, loss=0.57] 


Epoch 2 Loss: 9.7015
Epoch 3/3


Training Epoch 3: 100%|██████████| 20/20 [00:01<00:00, 12.26it/s, loss=0.167]


Epoch 3 Loss: 7.4188
Test Metrics: Precision=0.8761, Recall=0.8761, F1=0.8761

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 155, Split 4...


Map: 100%|██████████| 155/155 [00:00<00:00, 8780.50 examples/s]
Map: 100%|██████████| 31/31 [00:00<00:00, 5242.25 examples/s]
Map: 100%|██████████| 1062/1062 [00:00<00:00, 9138.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 20/20 [00:01<00:00, 11.83it/s, loss=0.455]


Epoch 1 Loss: 18.2538
Epoch 2/3


Training Epoch 2: 100%|██████████| 20/20 [00:01<00:00, 12.43it/s, loss=0.496]


Epoch 2 Loss: 9.0561
Epoch 3/3


Training Epoch 3: 100%|██████████| 20/20 [00:01<00:00, 12.84it/s, loss=0.283]


Epoch 3 Loss: 7.0108
Test Metrics: Precision=0.8720, Recall=0.8720, F1=0.8720

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 155, Split 5...


Map: 100%|██████████| 155/155 [00:00<00:00, 8306.93 examples/s]
Map: 100%|██████████| 31/31 [00:00<00:00, 6014.87 examples/s]
Map: 100%|██████████| 1062/1062 [00:00<00:00, 9233.37 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 20/20 [00:01<00:00, 11.00it/s, loss=0.622]


Epoch 1 Loss: 20.5756
Epoch 2/3


Training Epoch 2: 100%|██████████| 20/20 [00:01<00:00, 11.62it/s, loss=0.364]


Epoch 2 Loss: 11.0003
Epoch 3/3


Training Epoch 3: 100%|██████████| 20/20 [00:01<00:00, 11.65it/s, loss=0.33] 


Epoch 3 Loss: 8.9181
Test Metrics: Precision=0.8635, Recall=0.8635, F1=0.8635

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 160, Split 1...


Map: 100%|██████████| 160/160 [00:00<00:00, 8495.12 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 5014.86 examples/s]
Map: 100%|██████████| 1056/1056 [00:00<00:00, 9382.22 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 20/20 [00:01<00:00, 11.48it/s, loss=0.521]


Epoch 1 Loss: 19.8044
Epoch 2/3


Training Epoch 2: 100%|██████████| 20/20 [00:01<00:00, 11.51it/s, loss=0.302]


Epoch 2 Loss: 9.3820
Epoch 3/3


Training Epoch 3: 100%|██████████| 20/20 [00:01<00:00, 11.40it/s, loss=0.357]


Epoch 3 Loss: 7.4423
Test Metrics: Precision=0.8670, Recall=0.8670, F1=0.8670

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 160, Split 2...


Map: 100%|██████████| 160/160 [00:00<00:00, 8049.71 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 5270.05 examples/s]
Map: 100%|██████████| 1056/1056 [00:00<00:00, 9441.11 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 20/20 [00:01<00:00, 10.77it/s, loss=1.04] 


Epoch 1 Loss: 19.8333
Epoch 2/3


Training Epoch 2: 100%|██████████| 20/20 [00:01<00:00, 10.66it/s, loss=0.509]


Epoch 2 Loss: 10.5078
Epoch 3/3


Training Epoch 3: 100%|██████████| 20/20 [00:01<00:00, 11.34it/s, loss=0.403]


Epoch 3 Loss: 8.5901
Test Metrics: Precision=0.8705, Recall=0.8705, F1=0.8705

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 160, Split 3...


Map: 100%|██████████| 160/160 [00:00<00:00, 8706.17 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 5640.11 examples/s]
Map: 100%|██████████| 1056/1056 [00:00<00:00, 9285.93 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 20/20 [00:01<00:00, 11.64it/s, loss=0.674]


Epoch 1 Loss: 20.9450
Epoch 2/3


Training Epoch 2: 100%|██████████| 20/20 [00:01<00:00, 12.26it/s, loss=0.38] 


Epoch 2 Loss: 10.4474
Epoch 3/3


Training Epoch 3: 100%|██████████| 20/20 [00:01<00:00, 11.86it/s, loss=0.322]


Epoch 3 Loss: 7.9414
Test Metrics: Precision=0.8706, Recall=0.8706, F1=0.8706

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 160, Split 4...


Map: 100%|██████████| 160/160 [00:00<00:00, 9366.60 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 5137.52 examples/s]
Map: 100%|██████████| 1056/1056 [00:00<00:00, 9359.10 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 20/20 [00:01<00:00, 11.65it/s, loss=0.6]  


Epoch 1 Loss: 20.7225
Epoch 2/3


Training Epoch 2: 100%|██████████| 20/20 [00:01<00:00, 11.92it/s, loss=0.448]


Epoch 2 Loss: 9.7879
Epoch 3/3


Training Epoch 3: 100%|██████████| 20/20 [00:01<00:00, 11.91it/s, loss=0.453]


Epoch 3 Loss: 7.6183
Test Metrics: Precision=0.8651, Recall=0.8651, F1=0.8651

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 160, Split 5...


Map: 100%|██████████| 160/160 [00:00<00:00, 8600.06 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 5754.24 examples/s]
Map: 100%|██████████| 1056/1056 [00:00<00:00, 9397.74 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 20/20 [00:01<00:00, 10.93it/s, loss=0.667]


Epoch 1 Loss: 18.5386
Epoch 2/3


Training Epoch 2: 100%|██████████| 20/20 [00:01<00:00, 10.86it/s, loss=0.402]


Epoch 2 Loss: 10.1813
Epoch 3/3


Training Epoch 3: 100%|██████████| 20/20 [00:01<00:00, 11.39it/s, loss=0.375]


Epoch 3 Loss: 8.2117
Test Metrics: Precision=0.8693, Recall=0.8693, F1=0.8693

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 165, Split 1...


Map: 100%|██████████| 165/165 [00:00<00:00, 8719.53 examples/s]
Map: 100%|██████████| 33/33 [00:00<00:00, 5618.28 examples/s]
Map: 100%|██████████| 1050/1050 [00:00<00:00, 9396.14 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 21/21 [00:01<00:00, 11.75it/s, loss=0.538]


Epoch 1 Loss: 19.9680
Epoch 2/3


Training Epoch 2: 100%|██████████| 21/21 [00:01<00:00, 12.07it/s, loss=0.408]


Epoch 2 Loss: 9.6191
Epoch 3/3


Training Epoch 3: 100%|██████████| 21/21 [00:01<00:00, 11.55it/s, loss=0.542]


Epoch 3 Loss: 7.9277
Test Metrics: Precision=0.8715, Recall=0.8715, F1=0.8715

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 165, Split 2...


Map: 100%|██████████| 165/165 [00:00<00:00, 9052.81 examples/s]
Map: 100%|██████████| 33/33 [00:00<00:00, 5604.86 examples/s]
Map: 100%|██████████| 1050/1050 [00:00<00:00, 9460.43 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 21/21 [00:01<00:00, 11.08it/s, loss=0.638]


Epoch 1 Loss: 21.3868
Epoch 2/3


Training Epoch 2: 100%|██████████| 21/21 [00:01<00:00, 11.91it/s, loss=0.501]


Epoch 2 Loss: 11.5065
Epoch 3/3


Training Epoch 3: 100%|██████████| 21/21 [00:01<00:00, 11.12it/s, loss=0.247]


Epoch 3 Loss: 8.8198
Test Metrics: Precision=0.8676, Recall=0.8676, F1=0.8676

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 165, Split 3...


Map: 100%|██████████| 165/165 [00:00<00:00, 8620.58 examples/s]
Map: 100%|██████████| 33/33 [00:00<00:00, 6238.71 examples/s]
Map: 100%|██████████| 1050/1050 [00:00<00:00, 3448.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 21/21 [00:01<00:00, 11.72it/s, loss=0.475]


Epoch 1 Loss: 19.1034
Epoch 2/3


Training Epoch 2: 100%|██████████| 21/21 [00:01<00:00, 12.06it/s, loss=0.386]


Epoch 2 Loss: 9.6995
Epoch 3/3


Training Epoch 3: 100%|██████████| 21/21 [00:01<00:00, 12.02it/s, loss=0.514]


Epoch 3 Loss: 7.8548
Test Metrics: Precision=0.8724, Recall=0.8724, F1=0.8724

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 165, Split 4...


Map: 100%|██████████| 165/165 [00:00<00:00, 8733.94 examples/s]
Map: 100%|██████████| 33/33 [00:00<00:00, 6271.79 examples/s]
Map: 100%|██████████| 1050/1050 [00:00<00:00, 9164.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 21/21 [00:01<00:00, 11.55it/s, loss=0.573]


Epoch 1 Loss: 20.0593
Epoch 2/3


Training Epoch 2: 100%|██████████| 21/21 [00:01<00:00, 12.51it/s, loss=0.559]


Epoch 2 Loss: 9.4679
Epoch 3/3


Training Epoch 3: 100%|██████████| 21/21 [00:01<00:00, 12.10it/s, loss=0.38] 


Epoch 3 Loss: 7.8404
Test Metrics: Precision=0.8709, Recall=0.8709, F1=0.8709

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 165, Split 5...


Map: 100%|██████████| 165/165 [00:00<00:00, 8654.21 examples/s]
Map: 100%|██████████| 33/33 [00:00<00:00, 6845.64 examples/s]
Map: 100%|██████████| 1050/1050 [00:00<00:00, 9306.26 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 21/21 [00:01<00:00, 11.38it/s, loss=0.471]


Epoch 1 Loss: 21.0280
Epoch 2/3


Training Epoch 2: 100%|██████████| 21/21 [00:01<00:00, 11.31it/s, loss=0.323]


Epoch 2 Loss: 11.1463
Epoch 3/3


Training Epoch 3: 100%|██████████| 21/21 [00:01<00:00, 11.32it/s, loss=0.372]


Epoch 3 Loss: 8.9694
Test Metrics: Precision=0.8664, Recall=0.8664, F1=0.8664

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 170, Split 1...


Map: 100%|██████████| 170/170 [00:00<00:00, 8626.18 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 5583.43 examples/s]
Map: 100%|██████████| 1044/1044 [00:00<00:00, 9505.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 22/22 [00:01<00:00, 12.02it/s, loss=0.703]


Epoch 1 Loss: 20.3096
Epoch 2/3


Training Epoch 2: 100%|██████████| 22/22 [00:01<00:00, 11.85it/s, loss=0.241]


Epoch 2 Loss: 10.1033
Epoch 3/3


Training Epoch 3: 100%|██████████| 22/22 [00:01<00:00, 11.36it/s, loss=0.445]


Epoch 3 Loss: 8.3205
Test Metrics: Precision=0.8713, Recall=0.8713, F1=0.8713

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 170, Split 2...


Map: 100%|██████████| 170/170 [00:00<00:00, 8765.20 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 5799.60 examples/s]
Map: 100%|██████████| 1044/1044 [00:00<00:00, 9260.34 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 22/22 [00:01<00:00, 11.41it/s, loss=0.89] 


Epoch 1 Loss: 20.7261
Epoch 2/3


Training Epoch 2: 100%|██████████| 22/22 [00:01<00:00, 11.27it/s, loss=0.897]


Epoch 2 Loss: 11.5093
Epoch 3/3


Training Epoch 3: 100%|██████████| 22/22 [00:01<00:00, 11.45it/s, loss=1.07] 


Epoch 3 Loss: 9.4936
Test Metrics: Precision=0.8699, Recall=0.8699, F1=0.8699

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 170, Split 3...


Map: 100%|██████████| 170/170 [00:00<00:00, 8819.74 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 6636.87 examples/s]
Map: 100%|██████████| 1044/1044 [00:00<00:00, 9128.87 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 22/22 [00:01<00:00, 12.39it/s, loss=0.627]


Epoch 1 Loss: 21.2471
Epoch 2/3


Training Epoch 2: 100%|██████████| 22/22 [00:01<00:00, 12.07it/s, loss=0.366]


Epoch 2 Loss: 9.9495
Epoch 3/3


Training Epoch 3: 100%|██████████| 22/22 [00:01<00:00, 12.19it/s, loss=0.146]


Epoch 3 Loss: 7.7685
Test Metrics: Precision=0.8776, Recall=0.8776, F1=0.8776

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 170, Split 4...


Map: 100%|██████████| 170/170 [00:00<00:00, 9168.70 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 5827.56 examples/s]
Map: 100%|██████████| 1044/1044 [00:00<00:00, 9159.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 22/22 [00:01<00:00, 12.20it/s, loss=0.444]


Epoch 1 Loss: 20.1998
Epoch 2/3


Training Epoch 2: 100%|██████████| 22/22 [00:01<00:00, 12.08it/s, loss=0.484]


Epoch 2 Loss: 10.0207
Epoch 3/3


Training Epoch 3: 100%|██████████| 22/22 [00:01<00:00, 12.19it/s, loss=0.433]


Epoch 3 Loss: 7.8426
Test Metrics: Precision=0.8760, Recall=0.8760, F1=0.8760

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 170, Split 5...


Map: 100%|██████████| 170/170 [00:00<00:00, 8441.44 examples/s]
Map: 100%|██████████| 34/34 [00:00<00:00, 6267.86 examples/s]
Map: 100%|██████████| 1044/1044 [00:00<00:00, 9207.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 22/22 [00:01<00:00, 11.20it/s, loss=0.607]


Epoch 1 Loss: 21.0509
Epoch 2/3


Training Epoch 2: 100%|██████████| 22/22 [00:01<00:00, 11.31it/s, loss=0.366]


Epoch 2 Loss: 11.6275
Epoch 3/3


Training Epoch 3: 100%|██████████| 22/22 [00:01<00:00, 11.89it/s, loss=0.16] 


Epoch 3 Loss: 9.1570
Test Metrics: Precision=0.8680, Recall=0.8680, F1=0.8680

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 175, Split 1...


Map: 100%|██████████| 175/175 [00:00<00:00, 8339.52 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 5247.75 examples/s]
Map: 100%|██████████| 1038/1038 [00:00<00:00, 9148.82 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 22/22 [00:01<00:00, 11.38it/s, loss=0.538]


Epoch 1 Loss: 21.3963
Epoch 2/3


Training Epoch 2: 100%|██████████| 22/22 [00:01<00:00, 11.55it/s, loss=0.468]


Epoch 2 Loss: 11.1359
Epoch 3/3


Training Epoch 3: 100%|██████████| 22/22 [00:01<00:00, 11.51it/s, loss=0.399]


Epoch 3 Loss: 8.4995
Test Metrics: Precision=0.8701, Recall=0.8701, F1=0.8701

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 175, Split 2...


Map: 100%|██████████| 175/175 [00:00<00:00, 8876.99 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 5450.79 examples/s]
Map: 100%|██████████| 1038/1038 [00:00<00:00, 9266.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 22/22 [00:02<00:00, 10.81it/s, loss=0.584]


Epoch 1 Loss: 22.1444
Epoch 2/3


Training Epoch 2: 100%|██████████| 22/22 [00:02<00:00, 10.74it/s, loss=0.459]


Epoch 2 Loss: 12.1454
Epoch 3/3


Training Epoch 3: 100%|██████████| 22/22 [00:01<00:00, 11.17it/s, loss=0.333]


Epoch 3 Loss: 9.6354
Test Metrics: Precision=0.8647, Recall=0.8647, F1=0.8647

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 175, Split 3...


Map: 100%|██████████| 175/175 [00:00<00:00, 8733.14 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 7272.76 examples/s]
Map: 100%|██████████| 1038/1038 [00:00<00:00, 9215.75 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 22/22 [00:01<00:00, 11.91it/s, loss=0.463]


Epoch 1 Loss: 20.2171
Epoch 2/3


Training Epoch 2: 100%|██████████| 22/22 [00:01<00:00, 12.34it/s, loss=0.475]


Epoch 2 Loss: 9.6983
Epoch 3/3


Training Epoch 3: 100%|██████████| 22/22 [00:01<00:00, 11.99it/s, loss=0.41] 


Epoch 3 Loss: 7.6086
Test Metrics: Precision=0.8781, Recall=0.8781, F1=0.8781

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 175, Split 4...


Map: 100%|██████████| 175/175 [00:00<00:00, 9219.29 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 6431.85 examples/s]
Map: 100%|██████████| 1038/1038 [00:00<00:00, 9225.67 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 22/22 [00:01<00:00, 12.09it/s, loss=0.549]


Epoch 1 Loss: 18.3281
Epoch 2/3


Training Epoch 2: 100%|██████████| 22/22 [00:01<00:00, 12.19it/s, loss=0.438]


Epoch 2 Loss: 9.3398
Epoch 3/3


Training Epoch 3: 100%|██████████| 22/22 [00:01<00:00, 12.00it/s, loss=0.3]  


Epoch 3 Loss: 7.3002
Test Metrics: Precision=0.8788, Recall=0.8788, F1=0.8788

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 175, Split 5...


Map: 100%|██████████| 175/175 [00:00<00:00, 9003.85 examples/s]
Map: 100%|██████████| 35/35 [00:00<00:00, 6568.85 examples/s]
Map: 100%|██████████| 1038/1038 [00:00<00:00, 9296.16 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 22/22 [00:02<00:00, 10.99it/s, loss=0.592]


Epoch 1 Loss: 20.7117
Epoch 2/3


Training Epoch 2: 100%|██████████| 22/22 [00:01<00:00, 11.36it/s, loss=0.362]


Epoch 2 Loss: 10.7604
Epoch 3/3


Training Epoch 3: 100%|██████████| 22/22 [00:01<00:00, 11.52it/s, loss=0.209]


Epoch 3 Loss: 8.5432
Test Metrics: Precision=0.8701, Recall=0.8701, F1=0.8701

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 180, Split 1...


Map: 100%|██████████| 180/180 [00:00<00:00, 8372.14 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 6066.98 examples/s]
Map: 100%|██████████| 1032/1032 [00:00<00:00, 3488.35 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 23/23 [00:02<00:00, 11.05it/s, loss=0.55] 


Epoch 1 Loss: 21.5385
Epoch 2/3


Training Epoch 2: 100%|██████████| 23/23 [00:02<00:00, 11.08it/s, loss=0.221]


Epoch 2 Loss: 11.1665
Epoch 3/3


Training Epoch 3: 100%|██████████| 23/23 [00:01<00:00, 11.63it/s, loss=0.258]


Epoch 3 Loss: 8.7632
Test Metrics: Precision=0.8706, Recall=0.8706, F1=0.8706

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 180, Split 2...


Map: 100%|██████████| 180/180 [00:00<00:00, 8820.21 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 5953.82 examples/s]
Map: 100%|██████████| 1032/1032 [00:00<00:00, 9346.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 23/23 [00:02<00:00, 11.47it/s, loss=0.617]


Epoch 1 Loss: 21.3993
Epoch 2/3


Training Epoch 2: 100%|██████████| 23/23 [00:02<00:00, 11.32it/s, loss=0.319]


Epoch 2 Loss: 10.9364
Epoch 3/3


Training Epoch 3: 100%|██████████| 23/23 [00:01<00:00, 11.61it/s, loss=0.778]


Epoch 3 Loss: 9.0361
Test Metrics: Precision=0.8786, Recall=0.8786, F1=0.8786

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 180, Split 3...


Map: 100%|██████████| 180/180 [00:00<00:00, 8831.66 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 6263.01 examples/s]
Map: 100%|██████████| 1032/1032 [00:00<00:00, 9253.11 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 23/23 [00:01<00:00, 11.51it/s, loss=0.596]


Epoch 1 Loss: 21.8470
Epoch 2/3


Training Epoch 2: 100%|██████████| 23/23 [00:01<00:00, 11.59it/s, loss=0.498]


Epoch 2 Loss: 11.0827
Epoch 3/3


Training Epoch 3: 100%|██████████| 23/23 [00:01<00:00, 12.11it/s, loss=0.512]


Epoch 3 Loss: 8.9816
Test Metrics: Precision=0.8701, Recall=0.8701, F1=0.8701

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 180, Split 4...


Map: 100%|██████████| 180/180 [00:00<00:00, 8545.08 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 5671.81 examples/s]
Map: 100%|██████████| 1032/1032 [00:00<00:00, 9237.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 23/23 [00:01<00:00, 12.13it/s, loss=0.677]


Epoch 1 Loss: 21.8591
Epoch 2/3


Training Epoch 2: 100%|██████████| 23/23 [00:01<00:00, 12.23it/s, loss=0.458]


Epoch 2 Loss: 10.8475
Epoch 3/3


Training Epoch 3: 100%|██████████| 23/23 [00:01<00:00, 12.06it/s, loss=0.516]


Epoch 3 Loss: 8.7221
Test Metrics: Precision=0.8747, Recall=0.8747, F1=0.8747

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 180, Split 5...


Map: 100%|██████████| 180/180 [00:00<00:00, 8905.84 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 6596.26 examples/s]
Map: 100%|██████████| 1032/1032 [00:00<00:00, 9038.98 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 23/23 [00:02<00:00, 11.18it/s, loss=0.538]


Epoch 1 Loss: 21.7629
Epoch 2/3


Training Epoch 2: 100%|██████████| 23/23 [00:02<00:00, 11.46it/s, loss=0.737]


Epoch 2 Loss: 11.5629
Epoch 3/3


Training Epoch 3: 100%|██████████| 23/23 [00:02<00:00, 11.27it/s, loss=0.319]


Epoch 3 Loss: 9.5687
Test Metrics: Precision=0.8634, Recall=0.8634, F1=0.8634

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 185, Split 1...


Map: 100%|██████████| 185/185 [00:00<00:00, 8345.21 examples/s]
Map: 100%|██████████| 37/37 [00:00<00:00, 5226.99 examples/s]
Map: 100%|██████████| 1026/1026 [00:00<00:00, 9368.81 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 24/24 [00:02<00:00, 11.40it/s, loss=0.245]


Epoch 1 Loss: 20.1677
Epoch 2/3


Training Epoch 2: 100%|██████████| 24/24 [00:02<00:00, 11.78it/s, loss=0.393]


Epoch 2 Loss: 10.2721
Epoch 3/3


Training Epoch 3: 100%|██████████| 24/24 [00:02<00:00, 11.18it/s, loss=0.238]


Epoch 3 Loss: 8.1950
Test Metrics: Precision=0.8721, Recall=0.8721, F1=0.8721

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 185, Split 2...


Map: 100%|██████████| 185/185 [00:00<00:00, 9083.36 examples/s]
Map: 100%|██████████| 37/37 [00:00<00:00, 5818.00 examples/s]
Map: 100%|██████████| 1026/1026 [00:00<00:00, 9231.84 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 24/24 [00:01<00:00, 12.17it/s, loss=0.982]


Epoch 1 Loss: 22.6861
Epoch 2/3


Training Epoch 2: 100%|██████████| 24/24 [00:02<00:00, 11.62it/s, loss=0.215]


Epoch 2 Loss: 11.3511
Epoch 3/3


Training Epoch 3: 100%|██████████| 24/24 [00:02<00:00, 11.30it/s, loss=0.3]  


Epoch 3 Loss: 9.7488
Test Metrics: Precision=0.8672, Recall=0.8672, F1=0.8672

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 185, Split 3...


Map: 100%|██████████| 185/185 [00:00<00:00, 8749.66 examples/s]
Map: 100%|██████████| 37/37 [00:00<00:00, 6222.00 examples/s]
Map: 100%|██████████| 1026/1026 [00:00<00:00, 9403.69 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 24/24 [00:02<00:00, 11.46it/s, loss=0.664]


Epoch 1 Loss: 20.9151
Epoch 2/3


Training Epoch 2: 100%|██████████| 24/24 [00:02<00:00, 11.67it/s, loss=0.262]


Epoch 2 Loss: 10.4860
Epoch 3/3


Training Epoch 3: 100%|██████████| 24/24 [00:02<00:00, 11.45it/s, loss=0.188]


Epoch 3 Loss: 8.7042
Test Metrics: Precision=0.8735, Recall=0.8735, F1=0.8735

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 185, Split 4...


Map: 100%|██████████| 185/185 [00:00<00:00, 9158.73 examples/s]
Map: 100%|██████████| 37/37 [00:00<00:00, 6210.30 examples/s]
Map: 100%|██████████| 1026/1026 [00:00<00:00, 9236.61 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 24/24 [00:01<00:00, 12.05it/s, loss=0.337]


Epoch 1 Loss: 18.7961
Epoch 2/3


Training Epoch 2: 100%|██████████| 24/24 [00:01<00:00, 12.51it/s, loss=0.0702]


Epoch 2 Loss: 9.0038
Epoch 3/3


Training Epoch 3: 100%|██████████| 24/24 [00:02<00:00, 11.93it/s, loss=0.147]


Epoch 3 Loss: 7.2119
Test Metrics: Precision=0.8840, Recall=0.8840, F1=0.8840

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 185, Split 5...


Map: 100%|██████████| 185/185 [00:00<00:00, 7345.89 examples/s]
Map: 100%|██████████| 37/37 [00:00<00:00, 7115.18 examples/s]
Map: 100%|██████████| 1026/1026 [00:00<00:00, 8738.93 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 24/24 [00:02<00:00, 11.51it/s, loss=0.233]


Epoch 1 Loss: 23.0750
Epoch 2/3


Training Epoch 2: 100%|██████████| 24/24 [00:01<00:00, 12.14it/s, loss=0.488]


Epoch 2 Loss: 12.3355
Epoch 3/3


Training Epoch 3: 100%|██████████| 24/24 [00:02<00:00, 11.77it/s, loss=0.223]


Epoch 3 Loss: 9.9390
Test Metrics: Precision=0.8682, Recall=0.8682, F1=0.8682

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 190, Split 1...


Map: 100%|██████████| 190/190 [00:00<00:00, 8751.66 examples/s]
Map: 100%|██████████| 38/38 [00:00<00:00, 6335.05 examples/s]
Map: 100%|██████████| 1020/1020 [00:00<00:00, 9344.03 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 24/24 [00:02<00:00, 10.92it/s, loss=0.782]


Epoch 1 Loss: 20.7007
Epoch 2/3


Training Epoch 2: 100%|██████████| 24/24 [00:02<00:00, 11.44it/s, loss=0.556]


Epoch 2 Loss: 10.7993
Epoch 3/3


Training Epoch 3: 100%|██████████| 24/24 [00:02<00:00, 11.42it/s, loss=0.639]


Epoch 3 Loss: 9.0953
Test Metrics: Precision=0.8722, Recall=0.8722, F1=0.8722

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 190, Split 2...


Map: 100%|██████████| 190/190 [00:00<00:00, 8978.65 examples/s]
Map: 100%|██████████| 38/38 [00:00<00:00, 5454.23 examples/s]
Map: 100%|██████████| 1020/1020 [00:00<00:00, 9184.92 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 24/24 [00:02<00:00, 11.01it/s, loss=0.572]


Epoch 1 Loss: 20.5743
Epoch 2/3


Training Epoch 2: 100%|██████████| 24/24 [00:02<00:00, 11.09it/s, loss=0.23] 


Epoch 2 Loss: 10.9654
Epoch 3/3


Training Epoch 3: 100%|██████████| 24/24 [00:02<00:00, 11.27it/s, loss=0.447]


Epoch 3 Loss: 9.9295
Test Metrics: Precision=0.8677, Recall=0.8677, F1=0.8677

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 190, Split 3...


Map: 100%|██████████| 190/190 [00:00<00:00, 8250.52 examples/s]
Map: 100%|██████████| 38/38 [00:00<00:00, 4930.51 examples/s]
Map: 100%|██████████| 1020/1020 [00:00<00:00, 9395.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 24/24 [00:02<00:00, 11.03it/s, loss=0.36] 


Epoch 1 Loss: 22.1452
Epoch 2/3


Training Epoch 2: 100%|██████████| 24/24 [00:02<00:00, 10.78it/s, loss=0.361]


Epoch 2 Loss: 10.8515
Epoch 3/3


Training Epoch 3: 100%|██████████| 24/24 [00:02<00:00, 11.01it/s, loss=0.502]


Epoch 3 Loss: 9.0438
Test Metrics: Precision=0.8760, Recall=0.8760, F1=0.8760

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 190, Split 4...


Map: 100%|██████████| 190/190 [00:00<00:00, 9313.05 examples/s]
Map: 100%|██████████| 38/38 [00:00<00:00, 6103.14 examples/s]
Map: 100%|██████████| 1020/1020 [00:00<00:00, 9183.31 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 24/24 [00:02<00:00, 11.38it/s, loss=0.632]


Epoch 1 Loss: 19.2296
Epoch 2/3


Training Epoch 2: 100%|██████████| 24/24 [00:01<00:00, 12.21it/s, loss=0.447]


Epoch 2 Loss: 9.5928
Epoch 3/3


Training Epoch 3: 100%|██████████| 24/24 [00:01<00:00, 12.45it/s, loss=0.281]


Epoch 3 Loss: 7.5216
Test Metrics: Precision=0.8860, Recall=0.8860, F1=0.8860

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 190, Split 5...


Map: 100%|██████████| 190/190 [00:00<00:00, 8756.09 examples/s]
Map: 100%|██████████| 38/38 [00:00<00:00, 5954.70 examples/s]
Map: 100%|██████████| 1020/1020 [00:00<00:00, 9386.79 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 24/24 [00:02<00:00, 11.61it/s, loss=0.409]


Epoch 1 Loss: 21.0835
Epoch 2/3


Training Epoch 2: 100%|██████████| 24/24 [00:02<00:00, 11.07it/s, loss=0.543]


Epoch 2 Loss: 10.5271
Epoch 3/3


Training Epoch 3: 100%|██████████| 24/24 [00:01<00:00, 12.39it/s, loss=0.295]


Epoch 3 Loss: 8.7464
Test Metrics: Precision=0.8766, Recall=0.8766, F1=0.8766

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 195, Split 1...


Map: 100%|██████████| 195/195 [00:00<00:00, 8038.86 examples/s]
Map: 100%|██████████| 39/39 [00:00<00:00, 6768.65 examples/s]
Map: 100%|██████████| 1014/1014 [00:00<00:00, 8798.89 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 25/25 [00:02<00:00, 11.32it/s, loss=0.388]


Epoch 1 Loss: 21.6554
Epoch 2/3


Training Epoch 2: 100%|██████████| 25/25 [00:02<00:00, 11.36it/s, loss=0.429]


Epoch 2 Loss: 11.2170
Epoch 3/3


Training Epoch 3: 100%|██████████| 25/25 [00:02<00:00, 11.70it/s, loss=0.462]


Epoch 3 Loss: 8.9157
Test Metrics: Precision=0.8723, Recall=0.8723, F1=0.8723

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 195, Split 2...


Map: 100%|██████████| 195/195 [00:00<00:00, 9004.02 examples/s]
Map: 100%|██████████| 39/39 [00:00<00:00, 5976.10 examples/s]
Map: 100%|██████████| 1014/1014 [00:00<00:00, 9317.39 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 25/25 [00:02<00:00, 11.20it/s, loss=0.639]


Epoch 1 Loss: 21.9798
Epoch 2/3


Training Epoch 2: 100%|██████████| 25/25 [00:02<00:00, 11.26it/s, loss=0.256]


Epoch 2 Loss: 11.2172
Epoch 3/3


Training Epoch 3: 100%|██████████| 25/25 [00:02<00:00, 10.96it/s, loss=0.4]  


Epoch 3 Loss: 9.3669
Test Metrics: Precision=0.8790, Recall=0.8790, F1=0.8790

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 195, Split 3...


Map: 100%|██████████| 195/195 [00:00<00:00, 8138.53 examples/s]
Map: 100%|██████████| 39/39 [00:00<00:00, 6654.65 examples/s]
Map: 100%|██████████| 1014/1014 [00:00<00:00, 9378.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 25/25 [00:02<00:00, 11.07it/s, loss=0.615]


Epoch 1 Loss: 20.6574
Epoch 2/3


Training Epoch 2: 100%|██████████| 25/25 [00:02<00:00, 11.03it/s, loss=0.707]


Epoch 2 Loss: 11.0716
Epoch 3/3


Training Epoch 3: 100%|██████████| 25/25 [00:02<00:00, 11.63it/s, loss=0.273]


Epoch 3 Loss: 8.6233
Test Metrics: Precision=0.8800, Recall=0.8800, F1=0.8800

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 195, Split 4...


Map: 100%|██████████| 195/195 [00:00<00:00, 9404.05 examples/s]
Map: 100%|██████████| 39/39 [00:00<00:00, 6471.15 examples/s]
Map: 100%|██████████| 1014/1014 [00:00<00:00, 9270.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 25/25 [00:02<00:00, 12.15it/s, loss=0.338]


Epoch 1 Loss: 21.5149
Epoch 2/3


Training Epoch 2: 100%|██████████| 25/25 [00:02<00:00, 12.06it/s, loss=0.42] 


Epoch 2 Loss: 9.9600
Epoch 3/3


Training Epoch 3: 100%|██████████| 25/25 [00:02<00:00, 12.33it/s, loss=0.386]


Epoch 3 Loss: 8.0372
Test Metrics: Precision=0.8829, Recall=0.8829, F1=0.8829

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 195, Split 5...


Map: 100%|██████████| 195/195 [00:00<00:00, 8946.21 examples/s]
Map: 100%|██████████| 39/39 [00:00<00:00, 6362.42 examples/s]
Map: 100%|██████████| 1014/1014 [00:00<00:00, 9339.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 25/25 [00:02<00:00, 11.53it/s, loss=0.802]


Epoch 1 Loss: 23.2946
Epoch 2/3


Training Epoch 2: 100%|██████████| 25/25 [00:02<00:00, 11.82it/s, loss=0.571]


Epoch 2 Loss: 12.2581
Epoch 3/3


Training Epoch 3: 100%|██████████| 25/25 [00:02<00:00, 11.38it/s, loss=0.242]


Epoch 3 Loss: 9.6552
Test Metrics: Precision=0.8668, Recall=0.8668, F1=0.8668

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 200, Split 1...


Map: 100%|██████████| 200/200 [00:00<00:00, 8803.52 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 6488.71 examples/s]
Map: 100%|██████████| 1008/1008 [00:00<00:00, 9307.42 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 25/25 [00:02<00:00, 10.98it/s, loss=0.471]


Epoch 1 Loss: 21.0015
Epoch 2/3


Training Epoch 2: 100%|██████████| 25/25 [00:02<00:00, 11.14it/s, loss=0.296]


Epoch 2 Loss: 10.3547
Epoch 3/3


Training Epoch 3: 100%|██████████| 25/25 [00:02<00:00, 11.42it/s, loss=0.28] 


Epoch 3 Loss: 8.2367
Test Metrics: Precision=0.8795, Recall=0.8795, F1=0.8795

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 200, Split 2...


Map: 100%|██████████| 200/200 [00:00<00:00, 8425.09 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 6350.44 examples/s]
Map: 100%|██████████| 1008/1008 [00:00<00:00, 9352.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 25/25 [00:02<00:00, 11.32it/s, loss=0.473]


Epoch 1 Loss: 22.2443
Epoch 2/3


Training Epoch 2: 100%|██████████| 25/25 [00:02<00:00, 11.23it/s, loss=0.454]


Epoch 2 Loss: 11.1470
Epoch 3/3


Training Epoch 3: 100%|██████████| 25/25 [00:02<00:00, 10.87it/s, loss=0.48] 


Epoch 3 Loss: 9.1900
Test Metrics: Precision=0.8768, Recall=0.8768, F1=0.8768

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 200, Split 3...


Map: 100%|██████████| 200/200 [00:00<00:00, 8069.38 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 5804.46 examples/s]
Map: 100%|██████████| 1008/1008 [00:00<00:00, 9452.27 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 25/25 [00:02<00:00, 11.05it/s, loss=0.411]


Epoch 1 Loss: 20.7593
Epoch 2/3


Training Epoch 2: 100%|██████████| 25/25 [00:02<00:00, 11.31it/s, loss=0.289]


Epoch 2 Loss: 10.4382
Epoch 3/3


Training Epoch 3: 100%|██████████| 25/25 [00:02<00:00, 11.43it/s, loss=0.226]


Epoch 3 Loss: 8.0346
Test Metrics: Precision=0.8857, Recall=0.8857, F1=0.8857

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 200, Split 4...


Map: 100%|██████████| 200/200 [00:00<00:00, 9070.04 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 6202.53 examples/s]
Map: 100%|██████████| 1008/1008 [00:00<00:00, 9131.13 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 25/25 [00:02<00:00, 11.66it/s, loss=0.5]  


Epoch 1 Loss: 21.4077
Epoch 2/3


Training Epoch 2: 100%|██████████| 25/25 [00:02<00:00, 11.67it/s, loss=0.429]


Epoch 2 Loss: 10.0991
Epoch 3/3


Training Epoch 3: 100%|██████████| 25/25 [00:02<00:00, 12.13it/s, loss=0.411]


Epoch 3 Loss: 8.0070
Test Metrics: Precision=0.8827, Recall=0.8827, F1=0.8827

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 200, Split 5...


Map: 100%|██████████| 200/200 [00:00<00:00, 8018.48 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 6412.57 examples/s]
Map: 100%|██████████| 1008/1008 [00:00<00:00, 9164.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 25/25 [00:02<00:00, 10.94it/s, loss=0.618]


Epoch 1 Loss: 23.8553
Epoch 2/3


Training Epoch 2: 100%|██████████| 25/25 [00:02<00:00, 11.15it/s, loss=0.36] 


Epoch 2 Loss: 13.0316
Epoch 3/3


Training Epoch 3: 100%|██████████| 25/25 [00:02<00:00, 10.97it/s, loss=0.425]


Epoch 3 Loss: 10.2017
Test Metrics: Precision=0.8765, Recall=0.8765, F1=0.8765

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 205, Split 1...


Map: 100%|██████████| 205/205 [00:00<00:00, 8827.30 examples/s]
Map: 100%|██████████| 41/41 [00:00<00:00, 7716.69 examples/s]
Map: 100%|██████████| 1002/1002 [00:00<00:00, 9344.84 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 26/26 [00:02<00:00, 11.04it/s, loss=0.541]


Epoch 1 Loss: 21.9974
Epoch 2/3


Training Epoch 2: 100%|██████████| 26/26 [00:02<00:00, 11.68it/s, loss=0.283]


Epoch 2 Loss: 10.8046
Epoch 3/3


Training Epoch 3: 100%|██████████| 26/26 [00:02<00:00, 11.84it/s, loss=0.306]


Epoch 3 Loss: 8.9093
Test Metrics: Precision=0.8807, Recall=0.8807, F1=0.8807

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 205, Split 2...


Map: 100%|██████████| 205/205 [00:00<00:00, 9013.11 examples/s]
Map: 100%|██████████| 41/41 [00:00<00:00, 6141.22 examples/s]
Map: 100%|██████████| 1002/1002 [00:00<00:00, 9389.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 26/26 [00:02<00:00, 11.03it/s, loss=0.372]


Epoch 1 Loss: 22.2351
Epoch 2/3


Training Epoch 2: 100%|██████████| 26/26 [00:02<00:00, 11.43it/s, loss=0.392]


Epoch 2 Loss: 12.1584
Epoch 3/3


Training Epoch 3: 100%|██████████| 26/26 [00:02<00:00, 10.89it/s, loss=0.233]


Epoch 3 Loss: 9.6936
Test Metrics: Precision=0.8777, Recall=0.8777, F1=0.8777

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 205, Split 3...


Map: 100%|██████████| 205/205 [00:00<00:00, 8521.04 examples/s]
Map: 100%|██████████| 41/41 [00:00<00:00, 6172.30 examples/s]
Map: 100%|██████████| 1002/1002 [00:00<00:00, 9332.85 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 26/26 [00:02<00:00, 11.01it/s, loss=0.608]


Epoch 1 Loss: 23.2804
Epoch 2/3


Training Epoch 2: 100%|██████████| 26/26 [00:02<00:00, 11.24it/s, loss=0.367]


Epoch 2 Loss: 11.0616
Epoch 3/3


Training Epoch 3: 100%|██████████| 26/26 [00:02<00:00, 11.19it/s, loss=0.318]


Epoch 3 Loss: 8.7136
Test Metrics: Precision=0.8815, Recall=0.8815, F1=0.8815

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 205, Split 4...


Map: 100%|██████████| 205/205 [00:00<00:00, 9048.96 examples/s]
Map: 100%|██████████| 41/41 [00:00<00:00, 4859.05 examples/s]
Map: 100%|██████████| 1002/1002 [00:00<00:00, 9344.47 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 26/26 [00:02<00:00, 11.75it/s, loss=0.559]


Epoch 1 Loss: 22.2320
Epoch 2/3


Training Epoch 2: 100%|██████████| 26/26 [00:02<00:00, 12.41it/s, loss=0.348]


Epoch 2 Loss: 10.5792
Epoch 3/3


Training Epoch 3: 100%|██████████| 26/26 [00:02<00:00, 11.91it/s, loss=0.225]


Epoch 3 Loss: 8.2418
Test Metrics: Precision=0.8832, Recall=0.8832, F1=0.8832

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 205, Split 5...


Map: 100%|██████████| 205/205 [00:00<00:00, 9127.34 examples/s]
Map: 100%|██████████| 41/41 [00:00<00:00, 7060.54 examples/s]
Map: 100%|██████████| 1002/1002 [00:00<00:00, 9371.85 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 26/26 [00:02<00:00, 11.36it/s, loss=0.239]


Epoch 1 Loss: 22.4623
Epoch 2/3


Training Epoch 2: 100%|██████████| 26/26 [00:02<00:00, 11.25it/s, loss=0.203]


Epoch 2 Loss: 11.6255
Epoch 3/3


Training Epoch 3: 100%|██████████| 26/26 [00:02<00:00, 11.48it/s, loss=0.265]


Epoch 3 Loss: 9.4053
Test Metrics: Precision=0.8786, Recall=0.8786, F1=0.8786

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 210, Split 1...


Map: 100%|██████████| 210/210 [00:00<00:00, 8766.66 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 6252.82 examples/s]
Map: 100%|██████████| 996/996 [00:00<00:00, 9559.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 27/27 [00:02<00:00, 11.61it/s, loss=0.234]


Epoch 1 Loss: 22.0408
Epoch 2/3


Training Epoch 2: 100%|██████████| 27/27 [00:02<00:00, 11.48it/s, loss=0.799]


Epoch 2 Loss: 11.5531
Epoch 3/3


Training Epoch 3: 100%|██████████| 27/27 [00:02<00:00, 11.65it/s, loss=0.229]


Epoch 3 Loss: 9.0719
Test Metrics: Precision=0.8803, Recall=0.8803, F1=0.8803

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 210, Split 2...


Map: 100%|██████████| 210/210 [00:00<00:00, 8542.29 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 6428.99 examples/s]
Map: 100%|██████████| 996/996 [00:00<00:00, 9420.32 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 27/27 [00:02<00:00, 11.46it/s, loss=0.661]


Epoch 1 Loss: 25.1703
Epoch 2/3


Training Epoch 2: 100%|██████████| 27/27 [00:02<00:00, 11.66it/s, loss=0.84] 


Epoch 2 Loss: 13.7922
Epoch 3/3


Training Epoch 3: 100%|██████████| 27/27 [00:02<00:00, 11.26it/s, loss=1.1]  


Epoch 3 Loss: 11.6166
Test Metrics: Precision=0.8751, Recall=0.8751, F1=0.8751

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 210, Split 3...


Map: 100%|██████████| 210/210 [00:00<00:00, 8141.34 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 7390.84 examples/s]
Map: 100%|██████████| 996/996 [00:00<00:00, 9467.29 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 27/27 [00:02<00:00, 11.50it/s, loss=0.526]


Epoch 1 Loss: 23.4018
Epoch 2/3


Training Epoch 2: 100%|██████████| 27/27 [00:02<00:00, 10.95it/s, loss=0.234]


Epoch 2 Loss: 11.2690
Epoch 3/3


Training Epoch 3: 100%|██████████| 27/27 [00:02<00:00, 11.34it/s, loss=0.185]


Epoch 3 Loss: 8.9129
Test Metrics: Precision=0.8809, Recall=0.8809, F1=0.8809

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 210, Split 4...


Map: 100%|██████████| 210/210 [00:00<00:00, 9045.67 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 5979.46 examples/s]
Map: 100%|██████████| 996/996 [00:00<00:00, 9204.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 27/27 [00:02<00:00, 12.16it/s, loss=0.689]


Epoch 1 Loss: 21.0314
Epoch 2/3


Training Epoch 2: 100%|██████████| 27/27 [00:02<00:00, 12.29it/s, loss=0.343]


Epoch 2 Loss: 10.4123
Epoch 3/3


Training Epoch 3: 100%|██████████| 27/27 [00:02<00:00, 12.12it/s, loss=0.554]


Epoch 3 Loss: 8.5233
Test Metrics: Precision=0.8825, Recall=0.8825, F1=0.8825

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 210, Split 5...


Map: 100%|██████████| 210/210 [00:00<00:00, 8570.97 examples/s]
Map: 100%|██████████| 42/42 [00:00<00:00, 6351.80 examples/s]
Map: 100%|██████████| 996/996 [00:00<00:00, 9259.87 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 27/27 [00:02<00:00, 11.68it/s, loss=0.351]


Epoch 1 Loss: 23.7756
Epoch 2/3


Training Epoch 2: 100%|██████████| 27/27 [00:02<00:00, 11.26it/s, loss=0.291]


Epoch 2 Loss: 12.7560
Epoch 3/3


Training Epoch 3: 100%|██████████| 27/27 [00:02<00:00, 11.62it/s, loss=0.195]


Epoch 3 Loss: 9.9987
Test Metrics: Precision=0.8783, Recall=0.8783, F1=0.8783

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 215, Split 1...


Map: 100%|██████████| 215/215 [00:00<00:00, 8984.15 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 7810.96 examples/s]
Map: 100%|██████████| 990/990 [00:00<00:00, 9361.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 27/27 [00:02<00:00, 11.16it/s, loss=0.534]


Epoch 1 Loss: 22.7632
Epoch 2/3


Training Epoch 2: 100%|██████████| 27/27 [00:02<00:00, 11.50it/s, loss=0.418]


Epoch 2 Loss: 12.0670
Epoch 3/3


Training Epoch 3: 100%|██████████| 27/27 [00:02<00:00, 11.68it/s, loss=0.441]


Epoch 3 Loss: 9.3670
Test Metrics: Precision=0.8777, Recall=0.8777, F1=0.8777

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 215, Split 2...


Map: 100%|██████████| 215/215 [00:00<00:00, 9046.43 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 7067.21 examples/s]
Map: 100%|██████████| 990/990 [00:00<00:00, 9507.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 27/27 [00:02<00:00, 11.35it/s, loss=0.791]


Epoch 1 Loss: 23.0051
Epoch 2/3


Training Epoch 2: 100%|██████████| 27/27 [00:02<00:00, 10.85it/s, loss=0.417]


Epoch 2 Loss: 13.3994
Epoch 3/3


Training Epoch 3: 100%|██████████| 27/27 [00:02<00:00, 11.43it/s, loss=0.245]


Epoch 3 Loss: 10.4510
Test Metrics: Precision=0.8732, Recall=0.8732, F1=0.8732

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 215, Split 3...


Map: 100%|██████████| 215/215 [00:00<00:00, 8273.32 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 5308.46 examples/s]
Map: 100%|██████████| 990/990 [00:00<00:00, 9439.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 27/27 [00:02<00:00, 11.22it/s, loss=0.364]


Epoch 1 Loss: 22.7196
Epoch 2/3


Training Epoch 2: 100%|██████████| 27/27 [00:02<00:00, 11.12it/s, loss=0.248]


Epoch 2 Loss: 10.9663
Epoch 3/3


Training Epoch 3: 100%|██████████| 27/27 [00:02<00:00, 11.24it/s, loss=0.219]


Epoch 3 Loss: 8.5102
Test Metrics: Precision=0.8857, Recall=0.8857, F1=0.8857

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 215, Split 4...


Map: 100%|██████████| 215/215 [00:00<00:00, 9498.47 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 6582.78 examples/s]
Map: 100%|██████████| 990/990 [00:00<00:00, 9164.24 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 27/27 [00:02<00:00, 12.03it/s, loss=0.337]


Epoch 1 Loss: 21.2964
Epoch 2/3


Training Epoch 2: 100%|██████████| 27/27 [00:02<00:00, 11.85it/s, loss=0.382]


Epoch 2 Loss: 10.1195
Epoch 3/3


Training Epoch 3: 100%|██████████| 27/27 [00:02<00:00, 12.04it/s, loss=0.193]


Epoch 3 Loss: 7.8308
Test Metrics: Precision=0.8875, Recall=0.8875, F1=0.8875

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 215, Split 5...


Map: 100%|██████████| 215/215 [00:00<00:00, 8892.63 examples/s]
Map: 100%|██████████| 43/43 [00:00<00:00, 4965.59 examples/s]
Map: 100%|██████████| 990/990 [00:00<00:00, 9390.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 27/27 [00:02<00:00, 11.52it/s, loss=0.435]


Epoch 1 Loss: 22.9267
Epoch 2/3


Training Epoch 2: 100%|██████████| 27/27 [00:02<00:00, 11.55it/s, loss=0.293]


Epoch 2 Loss: 11.9711
Epoch 3/3


Training Epoch 3: 100%|██████████| 27/27 [00:02<00:00, 11.68it/s, loss=0.268]


Epoch 3 Loss: 9.6314
Test Metrics: Precision=0.8773, Recall=0.8773, F1=0.8773

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 220, Split 1...


Map: 100%|██████████| 220/220 [00:00<00:00, 9087.07 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 6938.21 examples/s]
Map: 100%|██████████| 984/984 [00:00<00:00, 9349.31 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 28/28 [00:02<00:00, 11.89it/s, loss=0.715]


Epoch 1 Loss: 24.5944
Epoch 2/3


Training Epoch 2: 100%|██████████| 28/28 [00:02<00:00, 11.53it/s, loss=0.333]


Epoch 2 Loss: 12.3433
Epoch 3/3


Training Epoch 3: 100%|██████████| 28/28 [00:02<00:00, 11.91it/s, loss=0.294]


Epoch 3 Loss: 9.6276
Test Metrics: Precision=0.8787, Recall=0.8787, F1=0.8787

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 220, Split 2...


Map: 100%|██████████| 220/220 [00:00<00:00, 8929.32 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 4673.91 examples/s]
Map: 100%|██████████| 984/984 [00:00<00:00, 9213.93 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 28/28 [00:02<00:00, 11.15it/s, loss=0.421]


Epoch 1 Loss: 22.5621
Epoch 2/3


Training Epoch 2: 100%|██████████| 28/28 [00:02<00:00, 11.10it/s, loss=0.585]


Epoch 2 Loss: 11.7017
Epoch 3/3


Training Epoch 3: 100%|██████████| 28/28 [00:02<00:00, 11.33it/s, loss=0.219]


Epoch 3 Loss: 9.2382
Test Metrics: Precision=0.8899, Recall=0.8899, F1=0.8899

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 220, Split 3...


Map: 100%|██████████| 220/220 [00:00<00:00, 8495.42 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 6538.74 examples/s]
Map: 100%|██████████| 984/984 [00:00<00:00, 9535.35 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 28/28 [00:02<00:00, 10.96it/s, loss=0.558]


Epoch 1 Loss: 21.9017
Epoch 2/3


Training Epoch 2: 100%|██████████| 28/28 [00:02<00:00, 11.67it/s, loss=0.326]


Epoch 2 Loss: 10.9919
Epoch 3/3


Training Epoch 3: 100%|██████████| 28/28 [00:02<00:00, 11.52it/s, loss=0.364]


Epoch 3 Loss: 8.4526
Test Metrics: Precision=0.8926, Recall=0.8926, F1=0.8926

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 220, Split 4...


Map: 100%|██████████| 220/220 [00:00<00:00, 9284.85 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 6655.22 examples/s]
Map: 100%|██████████| 984/984 [00:00<00:00, 9418.39 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 28/28 [00:02<00:00, 11.98it/s, loss=0.666]


Epoch 1 Loss: 20.8277
Epoch 2/3


Training Epoch 2: 100%|██████████| 28/28 [00:02<00:00, 12.37it/s, loss=0.275]


Epoch 2 Loss: 10.0478
Epoch 3/3


Training Epoch 3: 100%|██████████| 28/28 [00:02<00:00, 12.49it/s, loss=0.521]


Epoch 3 Loss: 8.2215
Test Metrics: Precision=0.8911, Recall=0.8911, F1=0.8911

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 220, Split 5...


Map: 100%|██████████| 220/220 [00:00<00:00, 8892.66 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 5958.40 examples/s]
Map: 100%|██████████| 984/984 [00:00<00:00, 9405.34 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 28/28 [00:02<00:00, 11.51it/s, loss=0.524]


Epoch 1 Loss: 23.5984
Epoch 2/3


Training Epoch 2: 100%|██████████| 28/28 [00:02<00:00, 11.52it/s, loss=0.557]


Epoch 2 Loss: 12.8944
Epoch 3/3


Training Epoch 3: 100%|██████████| 28/28 [00:02<00:00, 11.33it/s, loss=0.289]


Epoch 3 Loss: 10.1644
Test Metrics: Precision=0.8834, Recall=0.8834, F1=0.8834

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 225, Split 1...


Map: 100%|██████████| 225/225 [00:00<00:00, 7223.86 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 6213.17 examples/s]
Map: 100%|██████████| 978/978 [00:00<00:00, 9453.51 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 29/29 [00:02<00:00, 11.48it/s, loss=0.405]


Epoch 1 Loss: 23.2029
Epoch 2/3


Training Epoch 2: 100%|██████████| 29/29 [00:02<00:00, 11.80it/s, loss=0.12] 


Epoch 2 Loss: 11.1190
Epoch 3/3


Training Epoch 3: 100%|██████████| 29/29 [00:02<00:00, 11.58it/s, loss=0.585]


Epoch 3 Loss: 9.3718
Test Metrics: Precision=0.8855, Recall=0.8855, F1=0.8855

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 225, Split 2...


Map: 100%|██████████| 225/225 [00:00<00:00, 8804.25 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 6386.18 examples/s]
Map: 100%|██████████| 978/978 [00:00<00:00, 9348.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 29/29 [00:02<00:00, 11.44it/s, loss=0.208]


Epoch 1 Loss: 24.8329
Epoch 2/3


Training Epoch 2: 100%|██████████| 29/29 [00:02<00:00, 11.23it/s, loss=0.138]


Epoch 2 Loss: 13.3860
Epoch 3/3


Training Epoch 3: 100%|██████████| 29/29 [00:02<00:00, 12.06it/s, loss=0.939]


Epoch 3 Loss: 10.7043
Test Metrics: Precision=0.8836, Recall=0.8836, F1=0.8836

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 225, Split 3...


Map: 100%|██████████| 225/225 [00:00<00:00, 8736.43 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 6437.15 examples/s]
Map: 100%|██████████| 978/978 [00:00<00:00, 9687.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 29/29 [00:02<00:00, 11.69it/s, loss=0.552]


Epoch 1 Loss: 24.9113
Epoch 2/3


Training Epoch 2: 100%|██████████| 29/29 [00:02<00:00, 11.41it/s, loss=0.28] 


Epoch 2 Loss: 12.1423
Epoch 3/3


Training Epoch 3: 100%|██████████| 29/29 [00:02<00:00, 11.27it/s, loss=0.865]


Epoch 3 Loss: 10.2099
Test Metrics: Precision=0.8884, Recall=0.8884, F1=0.8884

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 225, Split 4...


Map: 100%|██████████| 225/225 [00:00<00:00, 9506.58 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 5859.24 examples/s]
Map: 100%|██████████| 978/978 [00:00<00:00, 9282.31 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 29/29 [00:02<00:00, 12.05it/s, loss=0.21] 


Epoch 1 Loss: 23.0411
Epoch 2/3


Training Epoch 2: 100%|██████████| 29/29 [00:02<00:00, 12.71it/s, loss=0.205]


Epoch 2 Loss: 11.2661
Epoch 3/3


Training Epoch 3: 100%|██████████| 29/29 [00:02<00:00, 12.33it/s, loss=0.612]


Epoch 3 Loss: 9.1889
Test Metrics: Precision=0.8875, Recall=0.8875, F1=0.8875

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 225, Split 5...


Map: 100%|██████████| 225/225 [00:00<00:00, 9334.23 examples/s]
Map: 100%|██████████| 45/45 [00:00<00:00, 6124.46 examples/s]
Map: 100%|██████████| 978/978 [00:00<00:00, 9471.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 29/29 [00:02<00:00, 11.30it/s, loss=0.231]


Epoch 1 Loss: 25.1501
Epoch 2/3


Training Epoch 2: 100%|██████████| 29/29 [00:02<00:00, 11.27it/s, loss=0.829]


Epoch 2 Loss: 14.4309
Epoch 3/3


Training Epoch 3: 100%|██████████| 29/29 [00:02<00:00, 11.64it/s, loss=0.702]


Epoch 3 Loss: 11.2935
Test Metrics: Precision=0.8786, Recall=0.8786, F1=0.8786

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 230, Split 1...


Map: 100%|██████████| 230/230 [00:00<00:00, 8798.46 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 5951.94 examples/s]
Map: 100%|██████████| 972/972 [00:00<00:00, 9644.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 29/29 [00:02<00:00, 11.57it/s, loss=0.527]


Epoch 1 Loss: 23.7472
Epoch 2/3


Training Epoch 2: 100%|██████████| 29/29 [00:02<00:00, 11.79it/s, loss=0.317]


Epoch 2 Loss: 12.2911
Epoch 3/3


Training Epoch 3: 100%|██████████| 29/29 [00:02<00:00, 11.76it/s, loss=0.465]


Epoch 3 Loss: 9.6687
Test Metrics: Precision=0.8800, Recall=0.8800, F1=0.8800

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 230, Split 2...


Map: 100%|██████████| 230/230 [00:00<00:00, 8935.95 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 6867.59 examples/s]
Map: 100%|██████████| 972/972 [00:00<00:00, 9419.13 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 29/29 [00:02<00:00, 11.20it/s, loss=0.394]


Epoch 1 Loss: 23.5979
Epoch 2/3


Training Epoch 2: 100%|██████████| 29/29 [00:02<00:00, 11.54it/s, loss=0.412]


Epoch 2 Loss: 13.7371
Epoch 3/3


Training Epoch 3: 100%|██████████| 29/29 [00:02<00:00, 11.80it/s, loss=0.268]


Epoch 3 Loss: 10.4236
Test Metrics: Precision=0.8761, Recall=0.8761, F1=0.8761

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 230, Split 3...


Map: 100%|██████████| 230/230 [00:00<00:00, 8562.46 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 6887.94 examples/s]
Map: 100%|██████████| 972/972 [00:00<00:00, 9474.89 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 29/29 [00:02<00:00, 11.14it/s, loss=0.458]


Epoch 1 Loss: 25.1812
Epoch 2/3


Training Epoch 2: 100%|██████████| 29/29 [00:02<00:00, 11.10it/s, loss=0.224]


Epoch 2 Loss: 12.1124
Epoch 3/3


Training Epoch 3: 100%|██████████| 29/29 [00:02<00:00, 11.19it/s, loss=0.235]


Epoch 3 Loss: 9.2387
Test Metrics: Precision=0.8872, Recall=0.8872, F1=0.8872

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 230, Split 4...


Map: 100%|██████████| 230/230 [00:00<00:00, 9004.01 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 6264.22 examples/s]
Map: 100%|██████████| 972/972 [00:00<00:00, 9171.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 29/29 [00:02<00:00, 11.50it/s, loss=0.382]


Epoch 1 Loss: 22.3965
Epoch 2/3


Training Epoch 2: 100%|██████████| 29/29 [00:02<00:00, 12.17it/s, loss=0.54] 


Epoch 2 Loss: 11.0483
Epoch 3/3


Training Epoch 3: 100%|██████████| 29/29 [00:02<00:00, 12.26it/s, loss=0.344]


Epoch 3 Loss: 8.3496
Test Metrics: Precision=0.8903, Recall=0.8903, F1=0.8903

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 230, Split 5...


Map: 100%|██████████| 230/230 [00:00<00:00, 9332.31 examples/s]
Map: 100%|██████████| 46/46 [00:00<00:00, 6138.46 examples/s]
Map: 100%|██████████| 972/972 [00:00<00:00, 9226.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 29/29 [00:02<00:00, 11.36it/s, loss=0.564]


Epoch 1 Loss: 23.9555
Epoch 2/3


Training Epoch 2: 100%|██████████| 29/29 [00:02<00:00, 11.93it/s, loss=0.303]


Epoch 2 Loss: 12.8469
Epoch 3/3


Training Epoch 3: 100%|██████████| 29/29 [00:02<00:00, 11.83it/s, loss=0.249]


Epoch 3 Loss: 9.6044
Test Metrics: Precision=0.8906, Recall=0.8906, F1=0.8906

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 235, Split 1...


Map: 100%|██████████| 235/235 [00:00<00:00, 8740.38 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 6587.10 examples/s]
Map: 100%|██████████| 966/966 [00:00<00:00, 9556.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 30/30 [00:02<00:00, 11.61it/s, loss=0.483]


Epoch 1 Loss: 23.9580
Epoch 2/3


Training Epoch 2: 100%|██████████| 30/30 [00:02<00:00, 12.05it/s, loss=0.454]


Epoch 2 Loss: 12.0564
Epoch 3/3


Training Epoch 3: 100%|██████████| 30/30 [00:02<00:00, 11.49it/s, loss=0.324]


Epoch 3 Loss: 9.0481
Test Metrics: Precision=0.8895, Recall=0.8895, F1=0.8895

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 235, Split 2...


Map: 100%|██████████| 235/235 [00:00<00:00, 8757.62 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 6694.02 examples/s]
Map: 100%|██████████| 966/966 [00:00<00:00, 9375.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 30/30 [00:02<00:00, 11.89it/s, loss=0.355]


Epoch 1 Loss: 23.2818
Epoch 2/3


Training Epoch 2: 100%|██████████| 30/30 [00:02<00:00, 10.99it/s, loss=0.329]


Epoch 2 Loss: 13.3635
Epoch 3/3


Training Epoch 3: 100%|██████████| 30/30 [00:02<00:00, 11.60it/s, loss=0.456]


Epoch 3 Loss: 10.6969
Test Metrics: Precision=0.8809, Recall=0.8809, F1=0.8809

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 235, Split 3...


Map: 100%|██████████| 235/235 [00:00<00:00, 8560.55 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 6674.76 examples/s]
Map: 100%|██████████| 966/966 [00:00<00:00, 9401.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 30/30 [00:02<00:00, 11.27it/s, loss=0.356]


Epoch 1 Loss: 24.8331
Epoch 2/3


Training Epoch 2: 100%|██████████| 30/30 [00:02<00:00, 11.38it/s, loss=0.47] 


Epoch 2 Loss: 12.5471
Epoch 3/3


Training Epoch 3: 100%|██████████| 30/30 [00:02<00:00, 11.39it/s, loss=0.43] 


Epoch 3 Loss: 9.5917
Test Metrics: Precision=0.8871, Recall=0.8871, F1=0.8871

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 235, Split 4...


Map: 100%|██████████| 235/235 [00:00<00:00, 9424.14 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 6662.80 examples/s]
Map: 100%|██████████| 966/966 [00:00<00:00, 9285.26 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 30/30 [00:02<00:00, 11.89it/s, loss=0.521]


Epoch 1 Loss: 25.9879
Epoch 2/3


Training Epoch 2: 100%|██████████| 30/30 [00:02<00:00, 11.93it/s, loss=0.357]


Epoch 2 Loss: 12.8391
Epoch 3/3


Training Epoch 3: 100%|██████████| 30/30 [00:02<00:00, 12.06it/s, loss=0.318]


Epoch 3 Loss: 9.9928
Test Metrics: Precision=0.8815, Recall=0.8815, F1=0.8815

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 235, Split 5...


Map: 100%|██████████| 235/235 [00:00<00:00, 9109.46 examples/s]
Map: 100%|██████████| 47/47 [00:00<00:00, 7059.60 examples/s]
Map: 100%|██████████| 966/966 [00:00<00:00, 9510.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 30/30 [00:02<00:00, 11.12it/s, loss=0.785]


Epoch 1 Loss: 28.5294
Epoch 2/3


Training Epoch 2: 100%|██████████| 30/30 [00:02<00:00, 11.41it/s, loss=0.472]


Epoch 2 Loss: 15.2440
Epoch 3/3


Training Epoch 3: 100%|██████████| 30/30 [00:02<00:00, 11.24it/s, loss=0.385]


Epoch 3 Loss: 12.0023
Test Metrics: Precision=0.8772, Recall=0.8772, F1=0.8772

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 240, Split 1...


Map: 100%|██████████| 240/240 [00:00<00:00, 8856.84 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 6934.88 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9454.35 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 30/30 [00:02<00:00, 11.31it/s, loss=0.51] 


Epoch 1 Loss: 22.8715
Epoch 2/3


Training Epoch 2: 100%|██████████| 30/30 [00:02<00:00, 11.49it/s, loss=0.487]


Epoch 2 Loss: 12.1638
Epoch 3/3


Training Epoch 3: 100%|██████████| 30/30 [00:02<00:00, 11.45it/s, loss=0.276]


Epoch 3 Loss: 9.8282
Test Metrics: Precision=0.8865, Recall=0.8865, F1=0.8865

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 240, Split 2...


Map: 100%|██████████| 240/240 [00:00<00:00, 8750.97 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 5978.34 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 8847.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 30/30 [00:02<00:00, 11.18it/s, loss=0.799]


Epoch 1 Loss: 24.7343
Epoch 2/3


Training Epoch 2: 100%|██████████| 30/30 [00:02<00:00, 10.81it/s, loss=0.591]


Epoch 2 Loss: 12.7317
Epoch 3/3


Training Epoch 3: 100%|██████████| 30/30 [00:02<00:00, 11.05it/s, loss=0.278]


Epoch 3 Loss: 9.8732
Test Metrics: Precision=0.8835, Recall=0.8835, F1=0.8835

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 240, Split 3...


Map: 100%|██████████| 240/240 [00:00<00:00, 8429.07 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 6405.76 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9270.03 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 30/30 [00:02<00:00, 11.08it/s, loss=0.425]


Epoch 1 Loss: 25.0710
Epoch 2/3


Training Epoch 2: 100%|██████████| 30/30 [00:02<00:00, 11.72it/s, loss=0.509]


Epoch 2 Loss: 12.6764
Epoch 3/3


Training Epoch 3: 100%|██████████| 30/30 [00:02<00:00, 11.38it/s, loss=0.242]


Epoch 3 Loss: 10.0173
Test Metrics: Precision=0.8849, Recall=0.8849, F1=0.8849

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 240, Split 4...


Map: 100%|██████████| 240/240 [00:00<00:00, 9392.69 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 5533.08 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9263.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 30/30 [00:02<00:00, 12.05it/s, loss=0.431]


Epoch 1 Loss: 23.2512
Epoch 2/3


Training Epoch 2: 100%|██████████| 30/30 [00:02<00:00, 12.10it/s, loss=0.332]


Epoch 2 Loss: 10.9127
Epoch 3/3


Training Epoch 3: 100%|██████████| 30/30 [00:02<00:00, 12.29it/s, loss=0.309]


Epoch 3 Loss: 8.3283
Test Metrics: Precision=0.8943, Recall=0.8943, F1=0.8943

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 240, Split 5...


Map: 100%|██████████| 240/240 [00:00<00:00, 9130.95 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 5392.87 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9239.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 30/30 [00:02<00:00, 10.69it/s, loss=0.304]


Epoch 1 Loss: 25.2587
Epoch 2/3


Training Epoch 2: 100%|██████████| 30/30 [00:02<00:00, 11.20it/s, loss=0.752]


Epoch 2 Loss: 13.2862
Epoch 3/3


Training Epoch 3: 100%|██████████| 30/30 [00:02<00:00, 11.04it/s, loss=0.452]


Epoch 3 Loss: 10.2178
Test Metrics: Precision=0.8892, Recall=0.8892, F1=0.8892

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 245, Split 1...


Map: 100%|██████████| 245/245 [00:00<00:00, 8828.44 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 7340.29 examples/s]
Map: 100%|██████████| 954/954 [00:00<00:00, 9516.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 31/31 [00:02<00:00, 11.38it/s, loss=0.482]


Epoch 1 Loss: 23.4508
Epoch 2/3


Training Epoch 2: 100%|██████████| 31/31 [00:02<00:00, 12.00it/s, loss=0.412]


Epoch 2 Loss: 12.8903
Epoch 3/3


Training Epoch 3: 100%|██████████| 31/31 [00:02<00:00, 11.46it/s, loss=0.367]


Epoch 3 Loss: 10.0388
Test Metrics: Precision=0.8826, Recall=0.8826, F1=0.8826

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 245, Split 2...


Map: 100%|██████████| 245/245 [00:00<00:00, 9242.63 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 6771.92 examples/s]
Map: 100%|██████████| 954/954 [00:00<00:00, 9186.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 31/31 [00:02<00:00, 11.47it/s, loss=0.381]


Epoch 1 Loss: 27.2217
Epoch 2/3


Training Epoch 2: 100%|██████████| 31/31 [00:02<00:00, 11.49it/s, loss=0.526]


Epoch 2 Loss: 13.8169
Epoch 3/3


Training Epoch 3: 100%|██████████| 31/31 [00:02<00:00, 10.97it/s, loss=0.199]


Epoch 3 Loss: 10.9458
Test Metrics: Precision=0.8822, Recall=0.8822, F1=0.8822

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 245, Split 3...


Map: 100%|██████████| 245/245 [00:00<00:00, 9019.53 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 7011.49 examples/s]
Map: 100%|██████████| 954/954 [00:00<00:00, 3270.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 31/31 [00:02<00:00, 10.90it/s, loss=0.618]


Epoch 1 Loss: 24.4058
Epoch 2/3


Training Epoch 2: 100%|██████████| 31/31 [00:02<00:00, 11.38it/s, loss=0.269]


Epoch 2 Loss: 11.9631
Epoch 3/3


Training Epoch 3: 100%|██████████| 31/31 [00:02<00:00, 11.53it/s, loss=0.462]


Epoch 3 Loss: 9.3225
Test Metrics: Precision=0.8910, Recall=0.8910, F1=0.8910

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 245, Split 4...


Map: 100%|██████████| 245/245 [00:00<00:00, 9339.99 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 6443.07 examples/s]
Map: 100%|██████████| 954/954 [00:00<00:00, 9218.61 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 31/31 [00:02<00:00, 11.90it/s, loss=0.433]


Epoch 1 Loss: 22.1761
Epoch 2/3


Training Epoch 2: 100%|██████████| 31/31 [00:02<00:00, 12.37it/s, loss=0.379]


Epoch 2 Loss: 10.8248
Epoch 3/3


Training Epoch 3: 100%|██████████| 31/31 [00:02<00:00, 12.32it/s, loss=0.264]


Epoch 3 Loss: 8.3709
Test Metrics: Precision=0.8936, Recall=0.8936, F1=0.8936

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 245, Split 5...


Map: 100%|██████████| 245/245 [00:00<00:00, 9357.17 examples/s]
Map: 100%|██████████| 49/49 [00:00<00:00, 6319.83 examples/s]
Map: 100%|██████████| 954/954 [00:00<00:00, 9458.80 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 31/31 [00:02<00:00, 10.95it/s, loss=0.644]


Epoch 1 Loss: 27.7254
Epoch 2/3


Training Epoch 2: 100%|██████████| 31/31 [00:02<00:00, 11.74it/s, loss=0.286]


Epoch 2 Loss: 14.1849
Epoch 3/3


Training Epoch 3: 100%|██████████| 31/31 [00:02<00:00, 11.17it/s, loss=0.383]


Epoch 3 Loss: 10.9350
Test Metrics: Precision=0.8878, Recall=0.8878, F1=0.8878

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 250, Split 1...


Map: 100%|██████████| 250/250 [00:00<00:00, 9037.81 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 7156.05 examples/s]
Map: 100%|██████████| 948/948 [00:00<00:00, 9451.50 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 32/32 [00:02<00:00, 11.42it/s, loss=0.486]


Epoch 1 Loss: 24.8222
Epoch 2/3


Training Epoch 2: 100%|██████████| 32/32 [00:02<00:00, 11.51it/s, loss=0.264]


Epoch 2 Loss: 12.3719
Epoch 3/3


Training Epoch 3: 100%|██████████| 32/32 [00:02<00:00, 11.90it/s, loss=0.149]


Epoch 3 Loss: 9.6133
Test Metrics: Precision=0.8916, Recall=0.8916, F1=0.8916

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 250, Split 2...


Map: 100%|██████████| 250/250 [00:00<00:00, 8877.44 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 7085.21 examples/s]
Map: 100%|██████████| 948/948 [00:00<00:00, 9386.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 32/32 [00:02<00:00, 11.94it/s, loss=0.464]


Epoch 1 Loss: 28.1849
Epoch 2/3


Training Epoch 2: 100%|██████████| 32/32 [00:02<00:00, 12.22it/s, loss=0.498]


Epoch 2 Loss: 13.7677
Epoch 3/3


Training Epoch 3: 100%|██████████| 32/32 [00:02<00:00, 11.15it/s, loss=0.368]


Epoch 3 Loss: 10.7440
Test Metrics: Precision=0.8814, Recall=0.8814, F1=0.8814

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 250, Split 3...


Map: 100%|██████████| 250/250 [00:00<00:00, 8291.96 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 6573.32 examples/s]
Map: 100%|██████████| 948/948 [00:00<00:00, 9067.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 32/32 [00:02<00:00, 11.62it/s, loss=0.332]


Epoch 1 Loss: 26.7173
Epoch 2/3


Training Epoch 2: 100%|██████████| 32/32 [00:02<00:00, 11.41it/s, loss=0.467]


Epoch 2 Loss: 12.9903
Epoch 3/3


Training Epoch 3: 100%|██████████| 32/32 [00:02<00:00, 11.56it/s, loss=0.245]


Epoch 3 Loss: 9.6131
Test Metrics: Precision=0.8922, Recall=0.8922, F1=0.8922

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 250, Split 4...


Map: 100%|██████████| 250/250 [00:00<00:00, 9484.05 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 5340.34 examples/s]
Map: 100%|██████████| 948/948 [00:00<00:00, 9408.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 32/32 [00:02<00:00, 12.37it/s, loss=0.352]


Epoch 1 Loss: 25.4692
Epoch 2/3


Training Epoch 2: 100%|██████████| 32/32 [00:02<00:00, 12.89it/s, loss=0.291]


Epoch 2 Loss: 11.9106
Epoch 3/3


Training Epoch 3: 100%|██████████| 32/32 [00:02<00:00, 12.06it/s, loss=0.319]


Epoch 3 Loss: 9.0397
Test Metrics: Precision=0.8945, Recall=0.8945, F1=0.8945

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 250, Split 5...


Map: 100%|██████████| 250/250 [00:00<00:00, 8791.10 examples/s]
Map: 100%|██████████| 50/50 [00:00<00:00, 5753.82 examples/s]
Map: 100%|██████████| 948/948 [00:00<00:00, 9261.86 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 32/32 [00:02<00:00, 11.26it/s, loss=1.13] 


Epoch 1 Loss: 27.3198
Epoch 2/3


Training Epoch 2: 100%|██████████| 32/32 [00:02<00:00, 11.42it/s, loss=0.811]


Epoch 2 Loss: 13.9568
Epoch 3/3


Training Epoch 3: 100%|██████████| 32/32 [00:02<00:00, 11.56it/s, loss=0.346]


Epoch 3 Loss: 10.7235
Test Metrics: Precision=0.8926, Recall=0.8926, F1=0.8926

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 255, Split 1...


Map: 100%|██████████| 255/255 [00:00<00:00, 7647.55 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 6528.40 examples/s]
Map: 100%|██████████| 942/942 [00:00<00:00, 8995.51 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 32/32 [00:02<00:00, 11.42it/s, loss=0.373]


Epoch 1 Loss: 23.4427
Epoch 2/3


Training Epoch 2: 100%|██████████| 32/32 [00:02<00:00, 11.51it/s, loss=0.351]


Epoch 2 Loss: 12.4988
Epoch 3/3


Training Epoch 3: 100%|██████████| 32/32 [00:02<00:00, 11.64it/s, loss=0.245]


Epoch 3 Loss: 9.3200
Test Metrics: Precision=0.8912, Recall=0.8912, F1=0.8912

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 255, Split 2...


Map: 100%|██████████| 255/255 [00:00<00:00, 9173.74 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 6628.13 examples/s]
Map: 100%|██████████| 942/942 [00:00<00:00, 9461.13 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 32/32 [00:02<00:00, 11.31it/s, loss=0.558]


Epoch 1 Loss: 24.2442
Epoch 2/3


Training Epoch 2: 100%|██████████| 32/32 [00:02<00:00, 11.15it/s, loss=0.447]


Epoch 2 Loss: 13.0978
Epoch 3/3


Training Epoch 3: 100%|██████████| 32/32 [00:02<00:00, 11.32it/s, loss=0.411]


Epoch 3 Loss: 10.3785
Test Metrics: Precision=0.8877, Recall=0.8877, F1=0.8877

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 255, Split 3...


Map: 100%|██████████| 255/255 [00:00<00:00, 8774.55 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 6899.42 examples/s]
Map: 100%|██████████| 942/942 [00:00<00:00, 9399.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 32/32 [00:02<00:00, 11.45it/s, loss=0.542]


Epoch 1 Loss: 24.1994
Epoch 2/3


Training Epoch 2: 100%|██████████| 32/32 [00:02<00:00, 11.46it/s, loss=0.241]


Epoch 2 Loss: 12.0736
Epoch 3/3


Training Epoch 3: 100%|██████████| 32/32 [00:02<00:00, 11.48it/s, loss=0.281]


Epoch 3 Loss: 9.4815
Test Metrics: Precision=0.8926, Recall=0.8926, F1=0.8926

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 255, Split 4...


Map: 100%|██████████| 255/255 [00:00<00:00, 9826.43 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 6696.18 examples/s]
Map: 100%|██████████| 942/942 [00:00<00:00, 9195.59 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 32/32 [00:02<00:00, 12.04it/s, loss=0.49] 


Epoch 1 Loss: 24.8093
Epoch 2/3


Training Epoch 2: 100%|██████████| 32/32 [00:02<00:00, 12.17it/s, loss=0.365]


Epoch 2 Loss: 11.4579
Epoch 3/3


Training Epoch 3: 100%|██████████| 32/32 [00:02<00:00, 12.58it/s, loss=0.362]


Epoch 3 Loss: 9.1380
Test Metrics: Precision=0.8932, Recall=0.8932, F1=0.8932

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 255, Split 5...


Map: 100%|██████████| 255/255 [00:00<00:00, 9293.79 examples/s]
Map: 100%|██████████| 51/51 [00:00<00:00, 6348.03 examples/s]
Map: 100%|██████████| 942/942 [00:00<00:00, 9370.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 32/32 [00:02<00:00, 11.09it/s, loss=0.437]


Epoch 1 Loss: 26.3536
Epoch 2/3


Training Epoch 2: 100%|██████████| 32/32 [00:02<00:00, 11.43it/s, loss=0.356]


Epoch 2 Loss: 13.9504
Epoch 3/3


Training Epoch 3: 100%|██████████| 32/32 [00:02<00:00, 11.18it/s, loss=0.531]


Epoch 3 Loss: 10.6829
Test Metrics: Precision=0.8904, Recall=0.8904, F1=0.8904

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 260, Split 1...


Map: 100%|██████████| 260/260 [00:00<00:00, 8789.40 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 5390.20 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 9587.21 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 33/33 [00:02<00:00, 11.64it/s, loss=0.705]


Epoch 1 Loss: 26.6596
Epoch 2/3


Training Epoch 2: 100%|██████████| 33/33 [00:02<00:00, 11.57it/s, loss=0.529]


Epoch 2 Loss: 13.4068
Epoch 3/3


Training Epoch 3: 100%|██████████| 33/33 [00:02<00:00, 11.42it/s, loss=0.31] 


Epoch 3 Loss: 9.7602
Test Metrics: Precision=0.8967, Recall=0.8967, F1=0.8967

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 260, Split 2...


Map: 100%|██████████| 260/260 [00:00<00:00, 9029.87 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 6513.09 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 9296.51 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 33/33 [00:02<00:00, 11.26it/s, loss=0.699]


Epoch 1 Loss: 26.0135
Epoch 2/3


Training Epoch 2: 100%|██████████| 33/33 [00:02<00:00, 11.38it/s, loss=0.241]


Epoch 2 Loss: 13.9479
Epoch 3/3


Training Epoch 3: 100%|██████████| 33/33 [00:02<00:00, 11.50it/s, loss=0.298]


Epoch 3 Loss: 10.5833
Test Metrics: Precision=0.8837, Recall=0.8837, F1=0.8837

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 260, Split 3...


Map: 100%|██████████| 260/260 [00:00<00:00, 8731.77 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 6985.80 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 9153.43 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 33/33 [00:02<00:00, 11.47it/s, loss=0.356]


Epoch 1 Loss: 24.6962
Epoch 2/3


Training Epoch 2: 100%|██████████| 33/33 [00:02<00:00, 11.62it/s, loss=0.327]


Epoch 2 Loss: 12.8690
Epoch 3/3


Training Epoch 3: 100%|██████████| 33/33 [00:02<00:00, 11.55it/s, loss=0.188]


Epoch 3 Loss: 9.8889
Test Metrics: Precision=0.8907, Recall=0.8907, F1=0.8907

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 260, Split 4...


Map: 100%|██████████| 260/260 [00:00<00:00, 9749.92 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 6891.77 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 9096.69 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 33/33 [00:02<00:00, 12.37it/s, loss=0.378]


Epoch 1 Loss: 23.6694
Epoch 2/3


Training Epoch 2: 100%|██████████| 33/33 [00:02<00:00, 12.75it/s, loss=0.356]


Epoch 2 Loss: 11.5983
Epoch 3/3


Training Epoch 3: 100%|██████████| 33/33 [00:02<00:00, 12.63it/s, loss=0.182]


Epoch 3 Loss: 8.6119
Test Metrics: Precision=0.8943, Recall=0.8943, F1=0.8943

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 260, Split 5...


Map: 100%|██████████| 260/260 [00:00<00:00, 8870.62 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 7604.74 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 9329.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 33/33 [00:02<00:00, 11.23it/s, loss=0.686]


Epoch 1 Loss: 27.9832
Epoch 2/3


Training Epoch 2: 100%|██████████| 33/33 [00:02<00:00, 11.49it/s, loss=0.223]


Epoch 2 Loss: 14.8183
Epoch 3/3


Training Epoch 3: 100%|██████████| 33/33 [00:02<00:00, 11.11it/s, loss=0.442]


Epoch 3 Loss: 11.8106
Test Metrics: Precision=0.8839, Recall=0.8839, F1=0.8839

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 265, Split 1...


Map: 100%|██████████| 265/265 [00:00<00:00, 8644.96 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 6782.76 examples/s]
Map: 100%|██████████| 930/930 [00:00<00:00, 9598.26 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 34/34 [00:02<00:00, 11.70it/s, loss=0.464]


Epoch 1 Loss: 26.5797
Epoch 2/3


Training Epoch 2: 100%|██████████| 34/34 [00:02<00:00, 11.73it/s, loss=1.28] 


Epoch 2 Loss: 14.4705
Epoch 3/3


Training Epoch 3: 100%|██████████| 34/34 [00:02<00:00, 12.13it/s, loss=0.0784]


Epoch 3 Loss: 10.4170
Test Metrics: Precision=0.8892, Recall=0.8892, F1=0.8892

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 265, Split 2...


Map: 100%|██████████| 265/265 [00:00<00:00, 8920.18 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 6749.19 examples/s]
Map: 100%|██████████| 930/930 [00:00<00:00, 9519.55 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 34/34 [00:02<00:00, 11.76it/s, loss=0.248]


Epoch 1 Loss: 26.4503
Epoch 2/3


Training Epoch 2: 100%|██████████| 34/34 [00:03<00:00, 11.06it/s, loss=0.229]


Epoch 2 Loss: 12.7405
Epoch 3/3


Training Epoch 3: 100%|██████████| 34/34 [00:02<00:00, 11.57it/s, loss=0.164]


Epoch 3 Loss: 9.8955
Test Metrics: Precision=0.8921, Recall=0.8921, F1=0.8921

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 265, Split 3...


Map: 100%|██████████| 265/265 [00:00<00:00, 8858.47 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 6060.14 examples/s]
Map: 100%|██████████| 930/930 [00:00<00:00, 9598.47 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 34/34 [00:02<00:00, 11.64it/s, loss=0.35] 


Epoch 1 Loss: 25.5444
Epoch 2/3


Training Epoch 2: 100%|██████████| 34/34 [00:02<00:00, 11.80it/s, loss=0.441]


Epoch 2 Loss: 13.2121
Epoch 3/3


Training Epoch 3: 100%|██████████| 34/34 [00:02<00:00, 11.64it/s, loss=0.198]


Epoch 3 Loss: 9.8400
Test Metrics: Precision=0.8943, Recall=0.8943, F1=0.8943

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 265, Split 4...


Map: 100%|██████████| 265/265 [00:00<00:00, 9506.58 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 6675.42 examples/s]
Map: 100%|██████████| 930/930 [00:00<00:00, 9040.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 34/34 [00:02<00:00, 12.29it/s, loss=0.446]


Epoch 1 Loss: 25.1415
Epoch 2/3


Training Epoch 2: 100%|██████████| 34/34 [00:02<00:00, 12.26it/s, loss=0.263]


Epoch 2 Loss: 12.4556
Epoch 3/3


Training Epoch 3: 100%|██████████| 34/34 [00:02<00:00, 12.50it/s, loss=0.221]


Epoch 3 Loss: 9.7101
Test Metrics: Precision=0.8963, Recall=0.8963, F1=0.8963

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 265, Split 5...


Map: 100%|██████████| 265/265 [00:00<00:00, 9178.44 examples/s]
Map: 100%|██████████| 53/53 [00:00<00:00, 6258.92 examples/s]
Map: 100%|██████████| 930/930 [00:00<00:00, 9529.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 34/34 [00:03<00:00, 11.18it/s, loss=0.56] 


Epoch 1 Loss: 27.0780
Epoch 2/3


Training Epoch 2: 100%|██████████| 34/34 [00:02<00:00, 11.54it/s, loss=0.268]


Epoch 2 Loss: 15.4458
Epoch 3/3


Training Epoch 3: 100%|██████████| 34/34 [00:02<00:00, 11.34it/s, loss=0.423]


Epoch 3 Loss: 12.3972
Test Metrics: Precision=0.8877, Recall=0.8877, F1=0.8877

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 270, Split 1...


Map: 100%|██████████| 270/270 [00:00<00:00, 9025.69 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 7968.91 examples/s]
Map: 100%|██████████| 924/924 [00:00<00:00, 9377.10 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 34/34 [00:02<00:00, 11.58it/s, loss=0.487]


Epoch 1 Loss: 26.3398
Epoch 2/3


Training Epoch 2: 100%|██████████| 34/34 [00:02<00:00, 11.54it/s, loss=0.306]


Epoch 2 Loss: 13.2253
Epoch 3/3


Training Epoch 3: 100%|██████████| 34/34 [00:02<00:00, 11.56it/s, loss=0.292]


Epoch 3 Loss: 10.3150
Test Metrics: Precision=0.8936, Recall=0.8936, F1=0.8936

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 270, Split 2...


Map: 100%|██████████| 270/270 [00:00<00:00, 8919.06 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 5580.69 examples/s]
Map: 100%|██████████| 924/924 [00:00<00:00, 9465.64 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 34/34 [00:03<00:00, 11.21it/s, loss=0.332]


Epoch 1 Loss: 25.9975
Epoch 2/3


Training Epoch 2: 100%|██████████| 34/34 [00:02<00:00, 11.42it/s, loss=0.33] 


Epoch 2 Loss: 13.2999
Epoch 3/3


Training Epoch 3: 100%|██████████| 34/34 [00:02<00:00, 11.67it/s, loss=0.587]


Epoch 3 Loss: 10.2874
Test Metrics: Precision=0.8909, Recall=0.8909, F1=0.8909

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 270, Split 3...


Map: 100%|██████████| 270/270 [00:00<00:00, 8748.93 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 7336.02 examples/s]
Map: 100%|██████████| 924/924 [00:00<00:00, 9381.80 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 34/34 [00:02<00:00, 11.57it/s, loss=0.475]


Epoch 1 Loss: 26.4693
Epoch 2/3


Training Epoch 2: 100%|██████████| 34/34 [00:02<00:00, 11.63it/s, loss=0.354]


Epoch 2 Loss: 12.6905
Epoch 3/3


Training Epoch 3: 100%|██████████| 34/34 [00:02<00:00, 11.62it/s, loss=0.255]


Epoch 3 Loss: 9.7666
Test Metrics: Precision=0.8904, Recall=0.8904, F1=0.8904

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 270, Split 4...


Map: 100%|██████████| 270/270 [00:00<00:00, 9669.25 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 6164.40 examples/s]
Map: 100%|██████████| 924/924 [00:00<00:00, 9219.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 34/34 [00:02<00:00, 12.46it/s, loss=0.52] 


Epoch 1 Loss: 26.6500
Epoch 2/3


Training Epoch 2: 100%|██████████| 34/34 [00:02<00:00, 12.23it/s, loss=0.319]


Epoch 2 Loss: 12.8811
Epoch 3/3


Training Epoch 3: 100%|██████████| 34/34 [00:02<00:00, 12.40it/s, loss=0.404]


Epoch 3 Loss: 9.6891
Test Metrics: Precision=0.8939, Recall=0.8939, F1=0.8939

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 270, Split 5...


Map: 100%|██████████| 270/270 [00:00<00:00, 9005.45 examples/s]
Map: 100%|██████████| 54/54 [00:00<00:00, 6624.13 examples/s]
Map: 100%|██████████| 924/924 [00:00<00:00, 9368.22 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 34/34 [00:03<00:00, 11.10it/s, loss=0.723]


Epoch 1 Loss: 26.7385
Epoch 2/3


Training Epoch 2: 100%|██████████| 34/34 [00:03<00:00, 11.00it/s, loss=0.654]


Epoch 2 Loss: 14.3695
Epoch 3/3


Training Epoch 3: 100%|██████████| 34/34 [00:03<00:00, 11.26it/s, loss=0.253]


Epoch 3 Loss: 11.3149
Test Metrics: Precision=0.8906, Recall=0.8906, F1=0.8906

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 275, Split 1...


Map: 100%|██████████| 275/275 [00:00<00:00, 8409.46 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 6769.18 examples/s]
Map: 100%|██████████| 918/918 [00:00<00:00, 9257.59 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 35/35 [00:02<00:00, 11.69it/s, loss=0.508]


Epoch 1 Loss: 26.8327
Epoch 2/3


Training Epoch 2: 100%|██████████| 35/35 [00:02<00:00, 11.69it/s, loss=0.352]


Epoch 2 Loss: 13.9533
Epoch 3/3


Training Epoch 3: 100%|██████████| 35/35 [00:02<00:00, 11.69it/s, loss=0.557]


Epoch 3 Loss: 10.7296
Test Metrics: Precision=0.8938, Recall=0.8938, F1=0.8938

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 275, Split 2...


Map: 100%|██████████| 275/275 [00:00<00:00, 8841.42 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 5371.55 examples/s]
Map: 100%|██████████| 918/918 [00:00<00:00, 9331.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 35/35 [00:03<00:00, 11.12it/s, loss=0.292]


Epoch 1 Loss: 26.6507
Epoch 2/3


Training Epoch 2: 100%|██████████| 35/35 [00:02<00:00, 11.69it/s, loss=0.344]


Epoch 2 Loss: 13.5201
Epoch 3/3


Training Epoch 3: 100%|██████████| 35/35 [00:03<00:00, 11.59it/s, loss=0.233]


Epoch 3 Loss: 10.5083
Test Metrics: Precision=0.8918, Recall=0.8918, F1=0.8918

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 275, Split 3...


Map: 100%|██████████| 275/275 [00:00<00:00, 8894.46 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 8221.49 examples/s]
Map: 100%|██████████| 918/918 [00:00<00:00, 9350.87 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 35/35 [00:03<00:00, 11.39it/s, loss=0.498]


Epoch 1 Loss: 25.7435
Epoch 2/3


Training Epoch 2: 100%|██████████| 35/35 [00:03<00:00, 11.58it/s, loss=0.268]


Epoch 2 Loss: 13.0261
Epoch 3/3


Training Epoch 3: 100%|██████████| 35/35 [00:02<00:00, 11.72it/s, loss=0.186]


Epoch 3 Loss: 9.5754
Test Metrics: Precision=0.8992, Recall=0.8992, F1=0.8992

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 275, Split 4...


Map: 100%|██████████| 275/275 [00:00<00:00, 9882.73 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 7015.38 examples/s]
Map: 100%|██████████| 918/918 [00:00<00:00, 9199.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 35/35 [00:02<00:00, 12.90it/s, loss=0.386]


Epoch 1 Loss: 25.8745
Epoch 2/3


Training Epoch 2: 100%|██████████| 35/35 [00:02<00:00, 12.84it/s, loss=0.266]


Epoch 2 Loss: 12.4264
Epoch 3/3


Training Epoch 3: 100%|██████████| 35/35 [00:02<00:00, 12.81it/s, loss=0.302]


Epoch 3 Loss: 9.4140
Test Metrics: Precision=0.8967, Recall=0.8967, F1=0.8967

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 275, Split 5...


Map: 100%|██████████| 275/275 [00:00<00:00, 8824.10 examples/s]
Map: 100%|██████████| 55/55 [00:00<00:00, 7078.02 examples/s]
Map: 100%|██████████| 918/918 [00:00<00:00, 9308.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 35/35 [00:03<00:00, 11.39it/s, loss=0.418]


Epoch 1 Loss: 26.0862
Epoch 2/3


Training Epoch 2: 100%|██████████| 35/35 [00:03<00:00, 11.31it/s, loss=0.133]


Epoch 2 Loss: 14.6795
Epoch 3/3


Training Epoch 3: 100%|██████████| 35/35 [00:03<00:00, 11.36it/s, loss=0.395]


Epoch 3 Loss: 11.6271
Test Metrics: Precision=0.8966, Recall=0.8966, F1=0.8966

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 280, Split 1...


Map: 100%|██████████| 280/280 [00:00<00:00, 8738.07 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 6791.02 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 9462.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 35/35 [00:03<00:00, 11.62it/s, loss=0.361]


Epoch 1 Loss: 28.1705
Epoch 2/3


Training Epoch 2: 100%|██████████| 35/35 [00:03<00:00, 11.40it/s, loss=0.641]


Epoch 2 Loss: 14.6445
Epoch 3/3


Training Epoch 3: 100%|██████████| 35/35 [00:03<00:00, 11.46it/s, loss=0.318]


Epoch 3 Loss: 11.4778
Test Metrics: Precision=0.8900, Recall=0.8900, F1=0.8900

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 280, Split 2...


Map: 100%|██████████| 280/280 [00:00<00:00, 8642.13 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 5931.04 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 9645.55 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 35/35 [00:03<00:00, 11.37it/s, loss=0.558]


Epoch 1 Loss: 27.4091
Epoch 2/3


Training Epoch 2: 100%|██████████| 35/35 [00:03<00:00, 11.16it/s, loss=0.616]


Epoch 2 Loss: 13.9090
Epoch 3/3


Training Epoch 3: 100%|██████████| 35/35 [00:03<00:00, 11.22it/s, loss=0.204]


Epoch 3 Loss: 10.5767
Test Metrics: Precision=0.8976, Recall=0.8976, F1=0.8976

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 280, Split 3...


Map: 100%|██████████| 280/280 [00:00<00:00, 9019.25 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 6765.98 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 9680.22 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 35/35 [00:03<00:00, 11.45it/s, loss=0.523]


Epoch 1 Loss: 27.8008
Epoch 2/3


Training Epoch 2: 100%|██████████| 35/35 [00:02<00:00, 11.87it/s, loss=0.372]


Epoch 2 Loss: 13.6097
Epoch 3/3


Training Epoch 3: 100%|██████████| 35/35 [00:03<00:00, 11.34it/s, loss=0.248]


Epoch 3 Loss: 10.2909
Test Metrics: Precision=0.8927, Recall=0.8927, F1=0.8927

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 280, Split 4...


Map: 100%|██████████| 280/280 [00:00<00:00, 9795.85 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 7077.50 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 9084.21 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 35/35 [00:02<00:00, 12.15it/s, loss=0.387]


Epoch 1 Loss: 26.9965
Epoch 2/3


Training Epoch 2: 100%|██████████| 35/35 [00:02<00:00, 12.42it/s, loss=0.382]


Epoch 2 Loss: 12.4897
Epoch 3/3


Training Epoch 3: 100%|██████████| 35/35 [00:02<00:00, 12.37it/s, loss=0.304]


Epoch 3 Loss: 9.5013
Test Metrics: Precision=0.8980, Recall=0.8980, F1=0.8980

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 280, Split 5...


Map: 100%|██████████| 280/280 [00:00<00:00, 8826.86 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 7174.35 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 9315.16 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 35/35 [00:03<00:00, 10.81it/s, loss=0.777]


Epoch 1 Loss: 26.4258
Epoch 2/3


Training Epoch 2: 100%|██████████| 35/35 [00:03<00:00, 11.44it/s, loss=0.21] 


Epoch 2 Loss: 13.8093
Epoch 3/3


Training Epoch 3: 100%|██████████| 35/35 [00:03<00:00, 11.19it/s, loss=0.356]


Epoch 3 Loss: 11.0046
Test Metrics: Precision=0.8930, Recall=0.8930, F1=0.8930

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 285, Split 1...


Map: 100%|██████████| 285/285 [00:00<00:00, 8809.81 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 7475.78 examples/s]
Map: 100%|██████████| 906/906 [00:00<00:00, 9281.81 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 36/36 [00:03<00:00, 11.68it/s, loss=0.482]


Epoch 1 Loss: 26.2840
Epoch 2/3


Training Epoch 2: 100%|██████████| 36/36 [00:03<00:00, 11.85it/s, loss=0.318]


Epoch 2 Loss: 12.9501
Epoch 3/3


Training Epoch 3: 100%|██████████| 36/36 [00:03<00:00, 11.75it/s, loss=0.135]


Epoch 3 Loss: 9.7428
Test Metrics: Precision=0.9005, Recall=0.9005, F1=0.9005

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 285, Split 2...


Map: 100%|██████████| 285/285 [00:00<00:00, 7934.64 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 6146.05 examples/s]
Map: 100%|██████████| 906/906 [00:00<00:00, 8692.70 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 36/36 [00:03<00:00, 11.62it/s, loss=0.359]


Epoch 1 Loss: 27.7674
Epoch 2/3


Training Epoch 2: 100%|██████████| 36/36 [00:03<00:00, 11.69it/s, loss=0.229]


Epoch 2 Loss: 14.0915
Epoch 3/3


Training Epoch 3: 100%|██████████| 36/36 [00:03<00:00, 11.41it/s, loss=0.367]


Epoch 3 Loss: 11.0231
Test Metrics: Precision=0.8916, Recall=0.8916, F1=0.8916

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 285, Split 3...


Map: 100%|██████████| 285/285 [00:00<00:00, 8975.24 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 6911.49 examples/s]
Map: 100%|██████████| 906/906 [00:00<00:00, 9457.80 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 36/36 [00:03<00:00, 11.47it/s, loss=0.274]


Epoch 1 Loss: 27.1682
Epoch 2/3


Training Epoch 2: 100%|██████████| 36/36 [00:02<00:00, 12.05it/s, loss=0.361]


Epoch 2 Loss: 13.5967
Epoch 3/3


Training Epoch 3: 100%|██████████| 36/36 [00:03<00:00, 11.94it/s, loss=0.542]


Epoch 3 Loss: 10.5629
Test Metrics: Precision=0.8932, Recall=0.8932, F1=0.8932

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 285, Split 4...


Map: 100%|██████████| 285/285 [00:00<00:00, 9944.81 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 6640.98 examples/s]
Map: 100%|██████████| 906/906 [00:00<00:00, 3196.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 36/36 [00:02<00:00, 12.40it/s, loss=0.543]


Epoch 1 Loss: 27.7804
Epoch 2/3


Training Epoch 2: 100%|██████████| 36/36 [00:03<00:00, 11.92it/s, loss=0.165]


Epoch 2 Loss: 12.9355
Epoch 3/3


Training Epoch 3: 100%|██████████| 36/36 [00:02<00:00, 12.38it/s, loss=0.209]


Epoch 3 Loss: 9.6756
Test Metrics: Precision=0.8980, Recall=0.8980, F1=0.8980

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 285, Split 5...


Map: 100%|██████████| 285/285 [00:00<00:00, 9275.11 examples/s]
Map: 100%|██████████| 57/57 [00:00<00:00, 6889.98 examples/s]
Map: 100%|██████████| 906/906 [00:00<00:00, 9520.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 36/36 [00:03<00:00, 11.26it/s, loss=0.861]


Epoch 1 Loss: 29.6465
Epoch 2/3


Training Epoch 2: 100%|██████████| 36/36 [00:03<00:00, 11.24it/s, loss=0.339]


Epoch 2 Loss: 15.4845
Epoch 3/3


Training Epoch 3: 100%|██████████| 36/36 [00:03<00:00, 11.19it/s, loss=0.486]


Epoch 3 Loss: 12.3694
Test Metrics: Precision=0.8912, Recall=0.8912, F1=0.8912

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 290, Split 1...


Map: 100%|██████████| 290/290 [00:00<00:00, 9073.50 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 6882.52 examples/s]
Map: 100%|██████████| 900/900 [00:00<00:00, 9505.03 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 37/37 [00:03<00:00, 11.63it/s, loss=0.292]


Epoch 1 Loss: 28.6940
Epoch 2/3


Training Epoch 2: 100%|██████████| 37/37 [00:03<00:00, 11.87it/s, loss=0.341]


Epoch 2 Loss: 15.8242
Epoch 3/3


Training Epoch 3: 100%|██████████| 37/37 [00:03<00:00, 11.97it/s, loss=0.348]


Epoch 3 Loss: 12.3870
Test Metrics: Precision=0.8866, Recall=0.8866, F1=0.8866

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 290, Split 2...


Map: 100%|██████████| 290/290 [00:00<00:00, 8746.62 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 7428.76 examples/s]
Map: 100%|██████████| 900/900 [00:00<00:00, 9321.18 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 37/37 [00:03<00:00, 12.00it/s, loss=0.563]


Epoch 1 Loss: 28.4922
Epoch 2/3


Training Epoch 2: 100%|██████████| 37/37 [00:03<00:00, 12.11it/s, loss=0.0836]


Epoch 2 Loss: 14.3624
Epoch 3/3


Training Epoch 3: 100%|██████████| 37/37 [00:03<00:00, 11.52it/s, loss=0.261]


Epoch 3 Loss: 11.3275
Test Metrics: Precision=0.8952, Recall=0.8952, F1=0.8952

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 290, Split 3...


Map: 100%|██████████| 290/290 [00:00<00:00, 9209.94 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 6906.95 examples/s]
Map: 100%|██████████| 900/900 [00:00<00:00, 9606.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 37/37 [00:03<00:00, 11.16it/s, loss=0.701]


Epoch 1 Loss: 29.1340
Epoch 2/3


Training Epoch 2: 100%|██████████| 37/37 [00:03<00:00, 11.61it/s, loss=0.441]


Epoch 2 Loss: 13.8276
Epoch 3/3


Training Epoch 3: 100%|██████████| 37/37 [00:03<00:00, 11.69it/s, loss=0.435]


Epoch 3 Loss: 10.7453
Test Metrics: Precision=0.8980, Recall=0.8980, F1=0.8980

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 290, Split 4...


Map: 100%|██████████| 290/290 [00:00<00:00, 10037.12 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 5714.31 examples/s]
Map: 100%|██████████| 900/900 [00:00<00:00, 9298.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 37/37 [00:02<00:00, 12.38it/s, loss=0.239]


Epoch 1 Loss: 27.5967
Epoch 2/3


Training Epoch 2: 100%|██████████| 37/37 [00:02<00:00, 13.20it/s, loss=0.191]


Epoch 2 Loss: 12.3106
Epoch 3/3


Training Epoch 3: 100%|██████████| 37/37 [00:02<00:00, 12.67it/s, loss=0.263]


Epoch 3 Loss: 9.6124
Test Metrics: Precision=0.8982, Recall=0.8982, F1=0.8982

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 290, Split 5...


Map: 100%|██████████| 290/290 [00:00<00:00, 9190.46 examples/s]
Map: 100%|██████████| 58/58 [00:00<00:00, 6529.85 examples/s]
Map: 100%|██████████| 900/900 [00:00<00:00, 9295.59 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 37/37 [00:03<00:00, 11.44it/s, loss=0.867]


Epoch 1 Loss: 27.9047
Epoch 2/3


Training Epoch 2: 100%|██████████| 37/37 [00:03<00:00, 10.98it/s, loss=0.343]


Epoch 2 Loss: 14.8894
Epoch 3/3


Training Epoch 3: 100%|██████████| 37/37 [00:03<00:00, 11.35it/s, loss=0.307]


Epoch 3 Loss: 11.5679
Test Metrics: Precision=0.9002, Recall=0.9002, F1=0.9002

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 295, Split 1...


Map: 100%|██████████| 295/295 [00:00<00:00, 8430.68 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 7322.50 examples/s]
Map: 100%|██████████| 894/894 [00:00<00:00, 9436.84 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 37/37 [00:03<00:00, 11.54it/s, loss=0.348]


Epoch 1 Loss: 27.5435
Epoch 2/3


Training Epoch 2: 100%|██████████| 37/37 [00:03<00:00, 11.80it/s, loss=0.273]


Epoch 2 Loss: 13.8549
Epoch 3/3


Training Epoch 3: 100%|██████████| 37/37 [00:03<00:00, 11.39it/s, loss=0.198]


Epoch 3 Loss: 10.7585
Test Metrics: Precision=0.8956, Recall=0.8956, F1=0.8956

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 295, Split 2...


Map: 100%|██████████| 295/295 [00:00<00:00, 8935.72 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 5350.46 examples/s]
Map: 100%|██████████| 894/894 [00:00<00:00, 9344.37 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 37/37 [00:03<00:00, 11.26it/s, loss=0.537]


Epoch 1 Loss: 28.0464
Epoch 2/3


Training Epoch 2: 100%|██████████| 37/37 [00:03<00:00, 11.40it/s, loss=0.409]


Epoch 2 Loss: 14.4623
Epoch 3/3


Training Epoch 3: 100%|██████████| 37/37 [00:03<00:00, 11.62it/s, loss=0.294]


Epoch 3 Loss: 11.3193
Test Metrics: Precision=0.8911, Recall=0.8911, F1=0.8911

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 295, Split 3...


Map: 100%|██████████| 295/295 [00:00<00:00, 9201.52 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 7416.87 examples/s]
Map: 100%|██████████| 894/894 [00:00<00:00, 9429.29 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 37/37 [00:03<00:00, 11.22it/s, loss=0.375]


Epoch 1 Loss: 26.0009
Epoch 2/3


Training Epoch 2: 100%|██████████| 37/37 [00:03<00:00, 11.18it/s, loss=0.272]


Epoch 2 Loss: 12.7370
Epoch 3/3


Training Epoch 3: 100%|██████████| 37/37 [00:03<00:00, 11.87it/s, loss=0.195]


Epoch 3 Loss: 9.6526
Test Metrics: Precision=0.9017, Recall=0.9017, F1=0.9017

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 295, Split 4...


Map: 100%|██████████| 295/295 [00:00<00:00, 9936.32 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 7041.63 examples/s]
Map: 100%|██████████| 894/894 [00:00<00:00, 9121.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 37/37 [00:03<00:00, 12.29it/s, loss=0.494]


Epoch 1 Loss: 28.0043
Epoch 2/3


Training Epoch 2: 100%|██████████| 37/37 [00:02<00:00, 12.54it/s, loss=0.414]


Epoch 2 Loss: 13.4815
Epoch 3/3


Training Epoch 3: 100%|██████████| 37/37 [00:03<00:00, 12.25it/s, loss=0.34] 


Epoch 3 Loss: 9.9406
Test Metrics: Precision=0.8941, Recall=0.8941, F1=0.8941

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 295, Split 5...


Map: 100%|██████████| 295/295 [00:00<00:00, 9148.32 examples/s]
Map: 100%|██████████| 59/59 [00:00<00:00, 7067.77 examples/s]
Map: 100%|██████████| 894/894 [00:00<00:00, 9288.56 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 37/37 [00:03<00:00, 10.97it/s, loss=0.481]


Epoch 1 Loss: 28.8806
Epoch 2/3


Training Epoch 2: 100%|██████████| 37/37 [00:03<00:00, 11.02it/s, loss=0.411]


Epoch 2 Loss: 14.6573
Epoch 3/3


Training Epoch 3: 100%|██████████| 37/37 [00:03<00:00, 11.10it/s, loss=0.256]


Epoch 3 Loss: 10.9084
Test Metrics: Precision=0.9005, Recall=0.9005, F1=0.9005

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 300, Split 1...


Map: 100%|██████████| 300/300 [00:00<00:00, 8871.01 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 7332.91 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 9483.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 38/38 [00:03<00:00, 12.22it/s, loss=0.183]


Epoch 1 Loss: 29.5326
Epoch 2/3


Training Epoch 2: 100%|██████████| 38/38 [00:03<00:00, 11.85it/s, loss=0.196]


Epoch 2 Loss: 14.2074
Epoch 3/3


Training Epoch 3: 100%|██████████| 38/38 [00:03<00:00, 11.90it/s, loss=0.26] 


Epoch 3 Loss: 10.6907
Test Metrics: Precision=0.9015, Recall=0.9015, F1=0.9015

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 300, Split 2...


Map: 100%|██████████| 300/300 [00:00<00:00, 8982.40 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 6140.40 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 9542.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 38/38 [00:03<00:00, 11.46it/s, loss=0.8]  


Epoch 1 Loss: 28.2377
Epoch 2/3


Training Epoch 2: 100%|██████████| 38/38 [00:03<00:00, 11.46it/s, loss=0.375]


Epoch 2 Loss: 15.6491
Epoch 3/3


Training Epoch 3: 100%|██████████| 38/38 [00:03<00:00, 11.37it/s, loss=0.403]


Epoch 3 Loss: 12.1864
Test Metrics: Precision=0.8881, Recall=0.8881, F1=0.8881

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 300, Split 3...


Map: 100%|██████████| 300/300 [00:00<00:00, 9050.31 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 6954.38 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 9304.73 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 38/38 [00:03<00:00, 11.27it/s, loss=0.365]


Epoch 1 Loss: 27.5557
Epoch 2/3


Training Epoch 2: 100%|██████████| 38/38 [00:03<00:00, 11.53it/s, loss=0.236]


Epoch 2 Loss: 13.7650
Epoch 3/3


Training Epoch 3: 100%|██████████| 38/38 [00:03<00:00, 11.62it/s, loss=0.496]


Epoch 3 Loss: 10.1077
Test Metrics: Precision=0.8959, Recall=0.8959, F1=0.8959

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 300, Split 4...


Map: 100%|██████████| 300/300 [00:00<00:00, 9260.24 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 6629.74 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 8998.43 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 38/38 [00:02<00:00, 12.86it/s, loss=0.262]


Epoch 1 Loss: 27.8125
Epoch 2/3


Training Epoch 2: 100%|██████████| 38/38 [00:02<00:00, 12.74it/s, loss=0.418]


Epoch 2 Loss: 13.5878
Epoch 3/3


Training Epoch 3: 100%|██████████| 38/38 [00:02<00:00, 12.81it/s, loss=0.303]


Epoch 3 Loss: 10.3020
Test Metrics: Precision=0.8983, Recall=0.8983, F1=0.8983

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 300, Split 5...


Map: 100%|██████████| 300/300 [00:00<00:00, 9130.16 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 7395.19 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 9440.86 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 38/38 [00:03<00:00, 11.06it/s, loss=0.257]


Epoch 1 Loss: 29.1805
Epoch 2/3


Training Epoch 2: 100%|██████████| 38/38 [00:03<00:00, 11.02it/s, loss=0.3]  


Epoch 2 Loss: 15.0191
Epoch 3/3


Training Epoch 3: 100%|██████████| 38/38 [00:03<00:00, 11.10it/s, loss=0.145]


Epoch 3 Loss: 11.2194
Test Metrics: Precision=0.8993, Recall=0.8993, F1=0.8993

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 305, Split 1...


Map: 100%|██████████| 305/305 [00:00<00:00, 9218.78 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 7394.80 examples/s]
Map: 100%|██████████| 882/882 [00:00<00:00, 9642.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 39/39 [00:03<00:00, 11.65it/s, loss=0.719]


Epoch 1 Loss: 29.2276
Epoch 2/3


Training Epoch 2: 100%|██████████| 39/39 [00:03<00:00, 12.25it/s, loss=0.195]


Epoch 2 Loss: 13.9721
Epoch 3/3


Training Epoch 3: 100%|██████████| 39/39 [00:03<00:00, 11.81it/s, loss=0.435]


Epoch 3 Loss: 11.1705
Test Metrics: Precision=0.8975, Recall=0.8975, F1=0.8975

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 305, Split 2...


Map: 100%|██████████| 305/305 [00:00<00:00, 9077.68 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 7325.56 examples/s]
Map: 100%|██████████| 882/882 [00:00<00:00, 9453.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 39/39 [00:03<00:00, 11.56it/s, loss=0.26] 


Epoch 1 Loss: 26.4462
Epoch 2/3


Training Epoch 2: 100%|██████████| 39/39 [00:03<00:00, 11.95it/s, loss=0.313]


Epoch 2 Loss: 13.6765
Epoch 3/3


Training Epoch 3: 100%|██████████| 39/39 [00:03<00:00, 11.99it/s, loss=0.113] 


Epoch 3 Loss: 10.1502
Test Metrics: Precision=0.8998, Recall=0.8998, F1=0.8998

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 305, Split 3...


Map: 100%|██████████| 305/305 [00:00<00:00, 9239.22 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 7565.35 examples/s]
Map: 100%|██████████| 882/882 [00:00<00:00, 9344.24 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 39/39 [00:03<00:00, 11.50it/s, loss=0.578]


Epoch 1 Loss: 26.6001
Epoch 2/3


Training Epoch 2: 100%|██████████| 39/39 [00:03<00:00, 12.13it/s, loss=0.186]


Epoch 2 Loss: 13.8489
Epoch 3/3


Training Epoch 3: 100%|██████████| 39/39 [00:03<00:00, 11.55it/s, loss=0.265]


Epoch 3 Loss: 10.8555
Test Metrics: Precision=0.8987, Recall=0.8987, F1=0.8987

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 305, Split 4...


Map: 100%|██████████| 305/305 [00:00<00:00, 9825.82 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 7372.00 examples/s]
Map: 100%|██████████| 882/882 [00:00<00:00, 9137.22 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 39/39 [00:03<00:00, 12.55it/s, loss=0.0504]


Epoch 1 Loss: 27.2664
Epoch 2/3


Training Epoch 2: 100%|██████████| 39/39 [00:03<00:00, 12.67it/s, loss=0.304]


Epoch 2 Loss: 13.5086
Epoch 3/3


Training Epoch 3: 100%|██████████| 39/39 [00:02<00:00, 13.32it/s, loss=0.192]


Epoch 3 Loss: 10.0499
Test Metrics: Precision=0.8995, Recall=0.8995, F1=0.8995

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 305, Split 5...


Map: 100%|██████████| 305/305 [00:00<00:00, 8703.65 examples/s]
Map: 100%|██████████| 61/61 [00:00<00:00, 7133.37 examples/s]
Map: 100%|██████████| 882/882 [00:00<00:00, 9353.74 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 39/39 [00:03<00:00, 10.81it/s, loss=0.414]


Epoch 1 Loss: 29.2973
Epoch 2/3


Training Epoch 2: 100%|██████████| 39/39 [00:03<00:00, 11.18it/s, loss=0.66] 


Epoch 2 Loss: 15.9539
Epoch 3/3


Training Epoch 3: 100%|██████████| 39/39 [00:03<00:00, 11.07it/s, loss=0.328]


Epoch 3 Loss: 12.0553
Test Metrics: Precision=0.8920, Recall=0.8920, F1=0.8920

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 310, Split 1...


Map: 100%|██████████| 310/310 [00:00<00:00, 8638.68 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 5428.50 examples/s]
Map: 100%|██████████| 876/876 [00:00<00:00, 9412.29 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 39/39 [00:03<00:00, 11.70it/s, loss=0.238]


Epoch 1 Loss: 26.9815
Epoch 2/3


Training Epoch 2: 100%|██████████| 39/39 [00:03<00:00, 12.00it/s, loss=0.286]


Epoch 2 Loss: 13.6726
Epoch 3/3


Training Epoch 3: 100%|██████████| 39/39 [00:03<00:00, 12.05it/s, loss=0.418]


Epoch 3 Loss: 10.3384
Test Metrics: Precision=0.9016, Recall=0.9016, F1=0.9016

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 310, Split 2...


Map: 100%|██████████| 310/310 [00:00<00:00, 9075.16 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 6647.58 examples/s]
Map: 100%|██████████| 876/876 [00:00<00:00, 9465.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 39/39 [00:03<00:00, 11.07it/s, loss=0.43] 


Epoch 1 Loss: 27.9765
Epoch 2/3


Training Epoch 2: 100%|██████████| 39/39 [00:03<00:00, 11.60it/s, loss=0.427]


Epoch 2 Loss: 14.9636
Epoch 3/3


Training Epoch 3: 100%|██████████| 39/39 [00:03<00:00, 11.13it/s, loss=0.415]


Epoch 3 Loss: 11.7078
Test Metrics: Precision=0.8964, Recall=0.8964, F1=0.8964

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 310, Split 3...


Map: 100%|██████████| 310/310 [00:00<00:00, 8833.17 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 7099.87 examples/s]
Map: 100%|██████████| 876/876 [00:00<00:00, 9397.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 39/39 [00:03<00:00, 11.55it/s, loss=0.36] 


Epoch 1 Loss: 28.7855
Epoch 2/3


Training Epoch 2: 100%|██████████| 39/39 [00:03<00:00, 12.10it/s, loss=0.225]


Epoch 2 Loss: 14.2212
Epoch 3/3


Training Epoch 3: 100%|██████████| 39/39 [00:03<00:00, 11.59it/s, loss=0.311]


Epoch 3 Loss: 10.6113
Test Metrics: Precision=0.8990, Recall=0.8990, F1=0.8990

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 310, Split 4...


Map: 100%|██████████| 310/310 [00:00<00:00, 10105.11 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 5481.83 examples/s]
Map: 100%|██████████| 876/876 [00:00<00:00, 9278.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 39/39 [00:02<00:00, 13.01it/s, loss=0.523]


Epoch 1 Loss: 26.8202
Epoch 2/3


Training Epoch 2: 100%|██████████| 39/39 [00:03<00:00, 12.57it/s, loss=0.279]


Epoch 2 Loss: 12.4590
Epoch 3/3


Training Epoch 3: 100%|██████████| 39/39 [00:03<00:00, 12.72it/s, loss=0.254]


Epoch 3 Loss: 9.3082
Test Metrics: Precision=0.9025, Recall=0.9025, F1=0.9025

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 310, Split 5...


Map: 100%|██████████| 310/310 [00:00<00:00, 8737.66 examples/s]
Map: 100%|██████████| 62/62 [00:00<00:00, 7544.37 examples/s]
Map: 100%|██████████| 876/876 [00:00<00:00, 9330.69 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 39/39 [00:03<00:00, 10.66it/s, loss=0.287]


Epoch 1 Loss: 28.1809
Epoch 2/3


Training Epoch 2: 100%|██████████| 39/39 [00:03<00:00, 10.87it/s, loss=0.257]


Epoch 2 Loss: 14.3244
Epoch 3/3


Training Epoch 3: 100%|██████████| 39/39 [00:03<00:00, 10.71it/s, loss=0.257]


Epoch 3 Loss: 10.7994
Test Metrics: Precision=0.9013, Recall=0.9013, F1=0.9013

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 315, Split 1...


Map: 100%|██████████| 315/315 [00:00<00:00, 8876.80 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 7585.51 examples/s]
Map: 100%|██████████| 870/870 [00:00<00:00, 9438.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 40/40 [00:03<00:00, 11.78it/s, loss=0.495]


Epoch 1 Loss: 28.2467
Epoch 2/3


Training Epoch 2: 100%|██████████| 40/40 [00:03<00:00, 11.93it/s, loss=0.346]


Epoch 2 Loss: 14.6340
Epoch 3/3


Training Epoch 3: 100%|██████████| 40/40 [00:03<00:00, 11.94it/s, loss=0.325]


Epoch 3 Loss: 11.0748
Test Metrics: Precision=0.8974, Recall=0.8974, F1=0.8974

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 315, Split 2...


Map: 100%|██████████| 315/315 [00:00<00:00, 9044.27 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 7649.63 examples/s]
Map: 100%|██████████| 870/870 [00:00<00:00, 9318.53 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 40/40 [00:03<00:00, 11.83it/s, loss=0.719]


Epoch 1 Loss: 32.5065
Epoch 2/3


Training Epoch 2: 100%|██████████| 40/40 [00:03<00:00, 11.52it/s, loss=0.116]


Epoch 2 Loss: 16.5931
Epoch 3/3


Training Epoch 3: 100%|██████████| 40/40 [00:03<00:00, 11.48it/s, loss=0.207]


Epoch 3 Loss: 12.9474
Test Metrics: Precision=0.8923, Recall=0.8923, F1=0.8923

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 315, Split 3...


Map: 100%|██████████| 315/315 [00:00<00:00, 8912.85 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 7271.76 examples/s]
Map: 100%|██████████| 870/870 [00:00<00:00, 9323.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 40/40 [00:03<00:00, 11.53it/s, loss=0.344]


Epoch 1 Loss: 28.2137
Epoch 2/3


Training Epoch 2: 100%|██████████| 40/40 [00:03<00:00, 11.67it/s, loss=0.336]


Epoch 2 Loss: 14.5636
Epoch 3/3


Training Epoch 3: 100%|██████████| 40/40 [00:03<00:00, 11.63it/s, loss=0.14] 


Epoch 3 Loss: 11.1142
Test Metrics: Precision=0.8970, Recall=0.8970, F1=0.8970

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 315, Split 4...


Map: 100%|██████████| 315/315 [00:00<00:00, 9702.69 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 5816.32 examples/s]
Map: 100%|██████████| 870/870 [00:00<00:00, 9240.19 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 40/40 [00:03<00:00, 12.85it/s, loss=0.508]


Epoch 1 Loss: 28.4319
Epoch 2/3


Training Epoch 2: 100%|██████████| 40/40 [00:03<00:00, 13.03it/s, loss=0.124]


Epoch 2 Loss: 13.7526
Epoch 3/3


Training Epoch 3: 100%|██████████| 40/40 [00:03<00:00, 12.79it/s, loss=0.27] 


Epoch 3 Loss: 10.0520
Test Metrics: Precision=0.8985, Recall=0.8985, F1=0.8985

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 315, Split 5...


Map: 100%|██████████| 315/315 [00:00<00:00, 9023.46 examples/s]
Map: 100%|██████████| 63/63 [00:00<00:00, 7151.12 examples/s]
Map: 100%|██████████| 870/870 [00:00<00:00, 9450.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 40/40 [00:03<00:00, 11.15it/s, loss=0.333]


Epoch 1 Loss: 27.9747
Epoch 2/3


Training Epoch 2: 100%|██████████| 40/40 [00:03<00:00, 11.66it/s, loss=0.34] 


Epoch 2 Loss: 14.6956
Epoch 3/3


Training Epoch 3: 100%|██████████| 40/40 [00:03<00:00, 11.66it/s, loss=0.11] 


Epoch 3 Loss: 11.0333
Test Metrics: Precision=0.9034, Recall=0.9034, F1=0.9034

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 320, Split 1...


Map: 100%|██████████| 320/320 [00:00<00:00, 8984.08 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 6965.29 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 9359.67 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 40/40 [00:03<00:00, 11.75it/s, loss=0.399]


Epoch 1 Loss: 26.3700
Epoch 2/3


Training Epoch 2: 100%|██████████| 40/40 [00:03<00:00, 11.94it/s, loss=0.186]


Epoch 2 Loss: 12.9450
Epoch 3/3


Training Epoch 3: 100%|██████████| 40/40 [00:03<00:00, 11.79it/s, loss=0.275] 


Epoch 3 Loss: 9.9451
Test Metrics: Precision=0.9058, Recall=0.9058, F1=0.9058

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 320, Split 2...


Map: 100%|██████████| 320/320 [00:00<00:00, 9215.66 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 7453.64 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 9269.80 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 40/40 [00:03<00:00, 11.44it/s, loss=0.479]


Epoch 1 Loss: 29.0670
Epoch 2/3


Training Epoch 2: 100%|██████████| 40/40 [00:03<00:00, 11.27it/s, loss=0.237]


Epoch 2 Loss: 14.8626
Epoch 3/3


Training Epoch 3: 100%|██████████| 40/40 [00:03<00:00, 11.15it/s, loss=0.211]


Epoch 3 Loss: 11.8467
Test Metrics: Precision=0.8974, Recall=0.8974, F1=0.8974

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 320, Split 3...


Map: 100%|██████████| 320/320 [00:00<00:00, 9121.77 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 6909.00 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 9288.61 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 40/40 [00:03<00:00, 11.19it/s, loss=0.483]


Epoch 1 Loss: 28.6246
Epoch 2/3


Training Epoch 2: 100%|██████████| 40/40 [00:03<00:00, 11.43it/s, loss=0.319]


Epoch 2 Loss: 13.8620
Epoch 3/3


Training Epoch 3: 100%|██████████| 40/40 [00:03<00:00, 11.30it/s, loss=0.256]


Epoch 3 Loss: 10.3153
Test Metrics: Precision=0.9035, Recall=0.9035, F1=0.9035

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 320, Split 4...


Map: 100%|██████████| 320/320 [00:00<00:00, 9872.58 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 7013.34 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 9176.56 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 40/40 [00:03<00:00, 12.22it/s, loss=0.41] 


Epoch 1 Loss: 26.4045
Epoch 2/3


Training Epoch 2: 100%|██████████| 40/40 [00:03<00:00, 12.49it/s, loss=0.323]


Epoch 2 Loss: 12.4992
Epoch 3/3


Training Epoch 3: 100%|██████████| 40/40 [00:03<00:00, 12.95it/s, loss=0.186]


Epoch 3 Loss: 9.1482
Test Metrics: Precision=0.9055, Recall=0.9055, F1=0.9055

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 320, Split 5...


Map: 100%|██████████| 320/320 [00:00<00:00, 8897.55 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 7466.91 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 9188.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 40/40 [00:03<00:00, 11.12it/s, loss=0.669]


Epoch 1 Loss: 30.1211
Epoch 2/3


Training Epoch 2: 100%|██████████| 40/40 [00:03<00:00, 11.21it/s, loss=0.315]


Epoch 2 Loss: 15.1917
Epoch 3/3


Training Epoch 3: 100%|██████████| 40/40 [00:03<00:00, 11.15it/s, loss=0.303]


Epoch 3 Loss: 11.1047
Test Metrics: Precision=0.9064, Recall=0.9064, F1=0.9064

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 325, Split 1...


Map: 100%|██████████| 325/325 [00:00<00:00, 8307.68 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 6777.28 examples/s]
Map: 100%|██████████| 858/858 [00:00<00:00, 9287.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 41/41 [00:03<00:00, 11.83it/s, loss=0.361]


Epoch 1 Loss: 31.3880
Epoch 2/3


Training Epoch 2: 100%|██████████| 41/41 [00:03<00:00, 11.62it/s, loss=0.389]


Epoch 2 Loss: 14.9433
Epoch 3/3


Training Epoch 3: 100%|██████████| 41/41 [00:03<00:00, 12.25it/s, loss=0.587]


Epoch 3 Loss: 10.9253
Test Metrics: Precision=0.9019, Recall=0.9019, F1=0.9019

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 325, Split 2...


Map: 100%|██████████| 325/325 [00:00<00:00, 9027.77 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 6605.52 examples/s]
Map: 100%|██████████| 858/858 [00:00<00:00, 9392.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 41/41 [00:03<00:00, 11.38it/s, loss=0.313]


Epoch 1 Loss: 27.3447
Epoch 2/3


Training Epoch 2: 100%|██████████| 41/41 [00:03<00:00, 11.55it/s, loss=0.358]


Epoch 2 Loss: 14.1243
Epoch 3/3


Training Epoch 3: 100%|██████████| 41/41 [00:03<00:00, 11.81it/s, loss=0.168]


Epoch 3 Loss: 10.8798
Test Metrics: Precision=0.8988, Recall=0.8988, F1=0.8988

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 325, Split 3...


Map: 100%|██████████| 325/325 [00:00<00:00, 9001.84 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 6918.13 examples/s]
Map: 100%|██████████| 858/858 [00:00<00:00, 9231.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 41/41 [00:03<00:00, 11.42it/s, loss=0.516]


Epoch 1 Loss: 27.1513
Epoch 2/3


Training Epoch 2: 100%|██████████| 41/41 [00:03<00:00, 11.40it/s, loss=0.242]


Epoch 2 Loss: 14.2244
Epoch 3/3


Training Epoch 3: 100%|██████████| 41/41 [00:03<00:00, 11.53it/s, loss=0.17] 


Epoch 3 Loss: 10.4265
Test Metrics: Precision=0.9001, Recall=0.9001, F1=0.9001

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 325, Split 4...


Map: 100%|██████████| 325/325 [00:00<00:00, 9806.83 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 8034.59 examples/s]
Map: 100%|██████████| 858/858 [00:00<00:00, 9058.97 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 41/41 [00:03<00:00, 12.18it/s, loss=0.36] 


Epoch 1 Loss: 26.7454
Epoch 2/3


Training Epoch 2: 100%|██████████| 41/41 [00:03<00:00, 12.54it/s, loss=0.604]


Epoch 2 Loss: 13.4418
Epoch 3/3


Training Epoch 3: 100%|██████████| 41/41 [00:03<00:00, 12.42it/s, loss=0.243]


Epoch 3 Loss: 10.0501
Test Metrics: Precision=0.8999, Recall=0.8999, F1=0.8999

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 325, Split 5...


Map: 100%|██████████| 325/325 [00:00<00:00, 8669.00 examples/s]
Map: 100%|██████████| 65/65 [00:00<00:00, 7124.04 examples/s]
Map: 100%|██████████| 858/858 [00:00<00:00, 9283.98 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 41/41 [00:03<00:00, 11.05it/s, loss=0.419]


Epoch 1 Loss: 30.0390
Epoch 2/3


Training Epoch 2: 100%|██████████| 41/41 [00:03<00:00, 11.28it/s, loss=0.362]


Epoch 2 Loss: 15.3584
Epoch 3/3


Training Epoch 3: 100%|██████████| 41/41 [00:03<00:00, 10.97it/s, loss=0.249]


Epoch 3 Loss: 11.2704
Test Metrics: Precision=0.9016, Recall=0.9016, F1=0.9016

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 330, Split 1...


Map: 100%|██████████| 330/330 [00:00<00:00, 8518.40 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 6651.87 examples/s]
Map: 100%|██████████| 852/852 [00:00<00:00, 9573.83 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 42/42 [00:03<00:00, 12.04it/s, loss=0.365]


Epoch 1 Loss: 29.5067
Epoch 2/3


Training Epoch 2: 100%|██████████| 42/42 [00:03<00:00, 11.74it/s, loss=0.313]


Epoch 2 Loss: 15.2664
Epoch 3/3


Training Epoch 3: 100%|██████████| 42/42 [00:03<00:00, 11.94it/s, loss=0.361]


Epoch 3 Loss: 11.3946
Test Metrics: Precision=0.9009, Recall=0.9009, F1=0.9009

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 330, Split 2...


Map: 100%|██████████| 330/330 [00:00<00:00, 9138.46 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 7778.80 examples/s]
Map: 100%|██████████| 852/852 [00:00<00:00, 9256.26 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 42/42 [00:03<00:00, 11.35it/s, loss=0.0987]


Epoch 1 Loss: 31.0803
Epoch 2/3


Training Epoch 2: 100%|██████████| 42/42 [00:03<00:00, 11.66it/s, loss=0.304]


Epoch 2 Loss: 15.6872
Epoch 3/3


Training Epoch 3: 100%|██████████| 42/42 [00:03<00:00, 11.42it/s, loss=0.15] 


Epoch 3 Loss: 11.9532
Test Metrics: Precision=0.8947, Recall=0.8947, F1=0.8947

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 330, Split 3...


Map: 100%|██████████| 330/330 [00:00<00:00, 9060.99 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 6787.57 examples/s]
Map: 100%|██████████| 852/852 [00:00<00:00, 9550.67 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 42/42 [00:03<00:00, 11.72it/s, loss=0.182]


Epoch 1 Loss: 27.8954
Epoch 2/3


Training Epoch 2: 100%|██████████| 42/42 [00:03<00:00, 11.31it/s, loss=0.191]


Epoch 2 Loss: 14.6151
Epoch 3/3


Training Epoch 3: 100%|██████████| 42/42 [00:03<00:00, 11.09it/s, loss=0.277] 


Epoch 3 Loss: 10.8485
Test Metrics: Precision=0.9040, Recall=0.9040, F1=0.9040

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 330, Split 4...


Map: 100%|██████████| 330/330 [00:00<00:00, 10121.76 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 7159.00 examples/s]
Map: 100%|██████████| 852/852 [00:00<00:00, 9122.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 42/42 [00:03<00:00, 12.43it/s, loss=0.77] 


Epoch 1 Loss: 30.3379
Epoch 2/3


Training Epoch 2: 100%|██████████| 42/42 [00:03<00:00, 12.67it/s, loss=0.285]


Epoch 2 Loss: 13.5490
Epoch 3/3


Training Epoch 3: 100%|██████████| 42/42 [00:03<00:00, 12.41it/s, loss=0.157]


Epoch 3 Loss: 10.7197
Test Metrics: Precision=0.9008, Recall=0.9008, F1=0.9008

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 330, Split 5...


Map: 100%|██████████| 330/330 [00:00<00:00, 8507.40 examples/s]
Map: 100%|██████████| 66/66 [00:00<00:00, 6944.91 examples/s]
Map: 100%|██████████| 852/852 [00:00<00:00, 9259.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 42/42 [00:03<00:00, 10.92it/s, loss=0.684]


Epoch 1 Loss: 33.4413
Epoch 2/3


Training Epoch 2: 100%|██████████| 42/42 [00:03<00:00, 10.99it/s, loss=0.178]


Epoch 2 Loss: 17.4408
Epoch 3/3


Training Epoch 3: 100%|██████████| 42/42 [00:03<00:00, 11.10it/s, loss=0.328]


Epoch 3 Loss: 13.1046
Test Metrics: Precision=0.9008, Recall=0.9008, F1=0.9008

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 335, Split 1...


Map: 100%|██████████| 335/335 [00:00<00:00, 8609.31 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 6552.68 examples/s]
Map: 100%|██████████| 846/846 [00:00<00:00, 9598.24 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 42/42 [00:03<00:00, 11.47it/s, loss=0.338]


Epoch 1 Loss: 29.0214
Epoch 2/3


Training Epoch 2: 100%|██████████| 42/42 [00:03<00:00, 12.06it/s, loss=0.388]


Epoch 2 Loss: 13.6577
Epoch 3/3


Training Epoch 3: 100%|██████████| 42/42 [00:03<00:00, 11.82it/s, loss=0.284]


Epoch 3 Loss: 10.1934
Test Metrics: Precision=0.9069, Recall=0.9069, F1=0.9069

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 335, Split 2...


Map: 100%|██████████| 335/335 [00:00<00:00, 9265.54 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 7725.80 examples/s]
Map: 100%|██████████| 846/846 [00:00<00:00, 9425.43 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 42/42 [00:03<00:00, 11.50it/s, loss=0.355]


Epoch 1 Loss: 28.4721
Epoch 2/3


Training Epoch 2: 100%|██████████| 42/42 [00:03<00:00, 11.61it/s, loss=0.218]


Epoch 2 Loss: 14.1112
Epoch 3/3


Training Epoch 3: 100%|██████████| 42/42 [00:03<00:00, 11.68it/s, loss=0.224]


Epoch 3 Loss: 10.3151
Test Metrics: Precision=0.9015, Recall=0.9015, F1=0.9015

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 335, Split 3...


Map: 100%|██████████| 335/335 [00:00<00:00, 9051.27 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 6587.40 examples/s]
Map: 100%|██████████| 846/846 [00:00<00:00, 9442.16 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 42/42 [00:03<00:00, 11.05it/s, loss=0.433]


Epoch 1 Loss: 29.6160
Epoch 2/3


Training Epoch 2: 100%|██████████| 42/42 [00:03<00:00, 11.32it/s, loss=0.257]


Epoch 2 Loss: 14.7263
Epoch 3/3


Training Epoch 3: 100%|██████████| 42/42 [00:03<00:00, 11.37it/s, loss=0.38] 


Epoch 3 Loss: 10.8177
Test Metrics: Precision=0.8994, Recall=0.8994, F1=0.8994

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 335, Split 4...


Map: 100%|██████████| 335/335 [00:00<00:00, 9712.12 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 6845.92 examples/s]
Map: 100%|██████████| 846/846 [00:00<00:00, 9297.74 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 42/42 [00:03<00:00, 12.36it/s, loss=0.431]


Epoch 1 Loss: 28.0162
Epoch 2/3


Training Epoch 2: 100%|██████████| 42/42 [00:03<00:00, 12.69it/s, loss=0.14] 


Epoch 2 Loss: 14.2463
Epoch 3/3


Training Epoch 3: 100%|██████████| 42/42 [00:03<00:00, 12.35it/s, loss=0.384]


Epoch 3 Loss: 10.1732
Test Metrics: Precision=0.9029, Recall=0.9029, F1=0.9029

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 335, Split 5...


Map: 100%|██████████| 335/335 [00:00<00:00, 8943.88 examples/s]
Map: 100%|██████████| 67/67 [00:00<00:00, 6573.84 examples/s]
Map: 100%|██████████| 846/846 [00:00<00:00, 9476.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 42/42 [00:03<00:00, 11.35it/s, loss=0.365]


Epoch 1 Loss: 30.2602
Epoch 2/3


Training Epoch 2: 100%|██████████| 42/42 [00:03<00:00, 11.05it/s, loss=0.595]


Epoch 2 Loss: 16.5277
Epoch 3/3


Training Epoch 3: 100%|██████████| 42/42 [00:03<00:00, 11.10it/s, loss=0.268]


Epoch 3 Loss: 12.2302
Test Metrics: Precision=0.9006, Recall=0.9006, F1=0.9006

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 340, Split 1...


Map: 100%|██████████| 340/340 [00:00<00:00, 8843.37 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 7497.70 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9413.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 43/43 [00:03<00:00, 11.60it/s, loss=0.471]


Epoch 1 Loss: 33.4669
Epoch 2/3


Training Epoch 2: 100%|██████████| 43/43 [00:03<00:00, 12.06it/s, loss=0.298]


Epoch 2 Loss: 16.5097
Epoch 3/3


Training Epoch 3: 100%|██████████| 43/43 [00:03<00:00, 11.71it/s, loss=0.203]


Epoch 3 Loss: 12.6833
Test Metrics: Precision=0.8990, Recall=0.8990, F1=0.8990

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 340, Split 2...


Map: 100%|██████████| 340/340 [00:00<00:00, 9063.41 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 7867.72 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9237.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 43/43 [00:03<00:00, 11.62it/s, loss=0.469]


Epoch 1 Loss: 29.8654
Epoch 2/3


Training Epoch 2: 100%|██████████| 43/43 [00:03<00:00, 11.54it/s, loss=0.186]


Epoch 2 Loss: 16.3100
Epoch 3/3


Training Epoch 3: 100%|██████████| 43/43 [00:03<00:00, 11.85it/s, loss=0.21] 


Epoch 3 Loss: 11.7942
Test Metrics: Precision=0.8965, Recall=0.8965, F1=0.8965

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 340, Split 3...


Map: 100%|██████████| 340/340 [00:00<00:00, 8537.51 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 7329.12 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9235.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 43/43 [00:03<00:00, 11.05it/s, loss=0.237]


Epoch 1 Loss: 29.0240
Epoch 2/3


Training Epoch 2: 100%|██████████| 43/43 [00:03<00:00, 11.05it/s, loss=0.267]


Epoch 2 Loss: 14.3701
Epoch 3/3


Training Epoch 3: 100%|██████████| 43/43 [00:03<00:00, 11.19it/s, loss=0.178]


Epoch 3 Loss: 10.6252
Test Metrics: Precision=0.9031, Recall=0.9031, F1=0.9031

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 340, Split 4...


Map: 100%|██████████| 340/340 [00:00<00:00, 9521.31 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 6438.35 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9078.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 43/43 [00:03<00:00, 12.04it/s, loss=0.365]


Epoch 1 Loss: 28.1095
Epoch 2/3


Training Epoch 2: 100%|██████████| 43/43 [00:03<00:00, 12.38it/s, loss=0.289]


Epoch 2 Loss: 14.3025
Epoch 3/3


Training Epoch 3: 100%|██████████| 43/43 [00:03<00:00, 12.41it/s, loss=0.0936]


Epoch 3 Loss: 9.9780
Test Metrics: Precision=0.9033, Recall=0.9033, F1=0.9033

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 340, Split 5...


Map: 100%|██████████| 340/340 [00:00<00:00, 7201.94 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 6756.51 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9227.79 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 43/43 [00:03<00:00, 11.40it/s, loss=0.487]


Epoch 1 Loss: 32.0977
Epoch 2/3


Training Epoch 2: 100%|██████████| 43/43 [00:03<00:00, 11.39it/s, loss=0.387]


Epoch 2 Loss: 16.1041
Epoch 3/3


Training Epoch 3: 100%|██████████| 43/43 [00:03<00:00, 11.63it/s, loss=0.249]


Epoch 3 Loss: 12.0550
Test Metrics: Precision=0.9036, Recall=0.9036, F1=0.9036

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 345, Split 1...


Map: 100%|██████████| 345/345 [00:00<00:00, 8615.25 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 6875.25 examples/s]
Map: 100%|██████████| 834/834 [00:00<00:00, 9362.16 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 44/44 [00:03<00:00, 11.42it/s, loss=0.513]


Epoch 1 Loss: 33.7649
Epoch 2/3


Training Epoch 2: 100%|██████████| 44/44 [00:03<00:00, 11.85it/s, loss=0.547]


Epoch 2 Loss: 17.3293
Epoch 3/3


Training Epoch 3: 100%|██████████| 44/44 [00:03<00:00, 11.83it/s, loss=0.0209]


Epoch 3 Loss: 12.1751
Test Metrics: Precision=0.8997, Recall=0.8997, F1=0.8997

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 345, Split 2...


Map: 100%|██████████| 345/345 [00:00<00:00, 8344.06 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 6773.56 examples/s]
Map: 100%|██████████| 834/834 [00:00<00:00, 8914.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 44/44 [00:03<00:00, 11.17it/s, loss=0.23] 


Epoch 1 Loss: 32.4103
Epoch 2/3


Training Epoch 2: 100%|██████████| 44/44 [00:03<00:00, 11.45it/s, loss=0.285]


Epoch 2 Loss: 15.7471
Epoch 3/3


Training Epoch 3: 100%|██████████| 44/44 [00:03<00:00, 11.77it/s, loss=0.221]


Epoch 3 Loss: 12.0050
Test Metrics: Precision=0.9005, Recall=0.9005, F1=0.9005

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 345, Split 3...


Map: 100%|██████████| 345/345 [00:00<00:00, 8725.65 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 7622.39 examples/s]
Map: 100%|██████████| 834/834 [00:00<00:00, 9223.35 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 44/44 [00:03<00:00, 11.58it/s, loss=0.181]


Epoch 1 Loss: 30.7783
Epoch 2/3


Training Epoch 2: 100%|██████████| 44/44 [00:03<00:00, 11.45it/s, loss=0.402]


Epoch 2 Loss: 14.8164
Epoch 3/3


Training Epoch 3: 100%|██████████| 44/44 [00:03<00:00, 11.57it/s, loss=0.279]


Epoch 3 Loss: 11.1030
Test Metrics: Precision=0.9038, Recall=0.9038, F1=0.9038

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 345, Split 4...


Map: 100%|██████████| 345/345 [00:00<00:00, 8966.30 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 6687.01 examples/s]
Map: 100%|██████████| 834/834 [00:00<00:00, 8356.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 44/44 [00:03<00:00, 12.04it/s, loss=0.233]


Epoch 1 Loss: 28.9298
Epoch 2/3


Training Epoch 2: 100%|██████████| 44/44 [00:03<00:00, 12.61it/s, loss=0.423]


Epoch 2 Loss: 14.0412
Epoch 3/3


Training Epoch 3: 100%|██████████| 44/44 [00:03<00:00, 11.92it/s, loss=0.0491]


Epoch 3 Loss: 9.6208
Test Metrics: Precision=0.9076, Recall=0.9076, F1=0.9076

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 345, Split 5...


Map: 100%|██████████| 345/345 [00:00<00:00, 8630.20 examples/s]
Map: 100%|██████████| 69/69 [00:00<00:00, 7267.33 examples/s]
Map: 100%|██████████| 834/834 [00:00<00:00, 8676.11 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 44/44 [00:03<00:00, 11.29it/s, loss=0.257]


Epoch 1 Loss: 29.6706
Epoch 2/3


Training Epoch 2: 100%|██████████| 44/44 [00:03<00:00, 11.06it/s, loss=0.466]


Epoch 2 Loss: 15.3747
Epoch 3/3


Training Epoch 3: 100%|██████████| 44/44 [00:04<00:00, 10.79it/s, loss=0.282]


Epoch 3 Loss: 11.4680
Test Metrics: Precision=0.9082, Recall=0.9082, F1=0.9082

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 350, Split 1...


Map: 100%|██████████| 350/350 [00:00<00:00, 8720.59 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 6787.21 examples/s]
Map: 100%|██████████| 828/828 [00:00<00:00, 9498.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 44/44 [00:03<00:00, 11.61it/s, loss=0.436]


Epoch 1 Loss: 30.0735
Epoch 2/3


Training Epoch 2: 100%|██████████| 44/44 [00:03<00:00, 11.56it/s, loss=0.467]


Epoch 2 Loss: 14.8053
Epoch 3/3


Training Epoch 3: 100%|██████████| 44/44 [00:03<00:00, 12.09it/s, loss=0.23] 


Epoch 3 Loss: 11.1925
Test Metrics: Precision=0.9056, Recall=0.9056, F1=0.9056

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 350, Split 2...


Map: 100%|██████████| 350/350 [00:00<00:00, 8931.00 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 7152.11 examples/s]
Map: 100%|██████████| 828/828 [00:00<00:00, 9319.32 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 44/44 [00:03<00:00, 11.94it/s, loss=0.483]


Epoch 1 Loss: 32.6053
Epoch 2/3


Training Epoch 2: 100%|██████████| 44/44 [00:03<00:00, 11.74it/s, loss=0.315]


Epoch 2 Loss: 16.0626
Epoch 3/3


Training Epoch 3: 100%|██████████| 44/44 [00:03<00:00, 12.02it/s, loss=0.232]


Epoch 3 Loss: 12.1016
Test Metrics: Precision=0.9022, Recall=0.9022, F1=0.9022

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 350, Split 3...


Map: 100%|██████████| 350/350 [00:00<00:00, 8352.24 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 7469.44 examples/s]
Map: 100%|██████████| 828/828 [00:00<00:00, 9035.50 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 44/44 [00:04<00:00, 10.76it/s, loss=0.279]


Epoch 1 Loss: 29.9427
Epoch 2/3


Training Epoch 2: 100%|██████████| 44/44 [00:03<00:00, 11.36it/s, loss=0.237]


Epoch 2 Loss: 14.3772
Epoch 3/3


Training Epoch 3: 100%|██████████| 44/44 [00:03<00:00, 11.05it/s, loss=0.266]


Epoch 3 Loss: 10.7332
Test Metrics: Precision=0.9036, Recall=0.9036, F1=0.9036

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 350, Split 4...


Map: 100%|██████████| 350/350 [00:00<00:00, 9417.36 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 5866.16 examples/s]
Map: 100%|██████████| 828/828 [00:00<00:00, 8795.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 44/44 [00:03<00:00, 12.32it/s, loss=0.439]


Epoch 1 Loss: 29.1054
Epoch 2/3


Training Epoch 2: 100%|██████████| 44/44 [00:03<00:00, 12.30it/s, loss=0.418]


Epoch 2 Loss: 14.5413
Epoch 3/3


Training Epoch 3: 100%|██████████| 44/44 [00:03<00:00, 12.24it/s, loss=0.305]


Epoch 3 Loss: 10.6864
Test Metrics: Precision=0.9020, Recall=0.9020, F1=0.9020

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 350, Split 5...


Map: 100%|██████████| 350/350 [00:00<00:00, 8468.40 examples/s]
Map: 100%|██████████| 70/70 [00:00<00:00, 6316.59 examples/s]
Map: 100%|██████████| 828/828 [00:00<00:00, 9328.56 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 44/44 [00:04<00:00, 10.80it/s, loss=0.45] 


Epoch 1 Loss: 33.0850
Epoch 2/3


Training Epoch 2: 100%|██████████| 44/44 [00:04<00:00, 10.87it/s, loss=0.401]


Epoch 2 Loss: 15.2629
Epoch 3/3


Training Epoch 3: 100%|██████████| 44/44 [00:03<00:00, 11.13it/s, loss=0.434]


Epoch 3 Loss: 11.6676
Test Metrics: Precision=0.9052, Recall=0.9052, F1=0.9052

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 355, Split 1...


Map: 100%|██████████| 355/355 [00:00<00:00, 8584.38 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 6990.51 examples/s]
Map: 100%|██████████| 822/822 [00:00<00:00, 9231.26 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:03<00:00, 12.02it/s, loss=0.324]


Epoch 1 Loss: 30.8111
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:03<00:00, 11.76it/s, loss=0.185]


Epoch 2 Loss: 15.0084
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:03<00:00, 11.86it/s, loss=0.262]


Epoch 3 Loss: 11.2423
Test Metrics: Precision=0.9093, Recall=0.9093, F1=0.9093

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 355, Split 2...


Map: 100%|██████████| 355/355 [00:00<00:00, 8565.17 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 6334.86 examples/s]
Map: 100%|██████████| 822/822 [00:00<00:00, 9046.59 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:04<00:00, 11.15it/s, loss=0.529]


Epoch 1 Loss: 31.9504
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:03<00:00, 11.72it/s, loss=0.0325]


Epoch 2 Loss: 14.6627
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:03<00:00, 12.14it/s, loss=0.312]


Epoch 3 Loss: 11.1914
Test Metrics: Precision=0.9040, Recall=0.9040, F1=0.9040

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 355, Split 3...


Map: 100%|██████████| 355/355 [00:00<00:00, 8373.61 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 8201.02 examples/s]
Map: 100%|██████████| 822/822 [00:00<00:00, 9215.69 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:04<00:00, 11.06it/s, loss=0.785]


Epoch 1 Loss: 31.6331
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:03<00:00, 11.46it/s, loss=0.306]


Epoch 2 Loss: 15.1740
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:03<00:00, 11.26it/s, loss=0.0607]


Epoch 3 Loss: 11.2464
Test Metrics: Precision=0.9026, Recall=0.9026, F1=0.9026

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 355, Split 4...


Map: 100%|██████████| 355/355 [00:00<00:00, 9275.62 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 6499.82 examples/s]
Map: 100%|██████████| 822/822 [00:00<00:00, 8749.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:03<00:00, 12.22it/s, loss=0.377]


Epoch 1 Loss: 30.8667
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:03<00:00, 12.83it/s, loss=0.644]


Epoch 2 Loss: 15.0013
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:03<00:00, 12.14it/s, loss=0.369]


Epoch 3 Loss: 11.2051
Test Metrics: Precision=0.9014, Recall=0.9014, F1=0.9014

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 355, Split 5...


Map: 100%|██████████| 355/355 [00:00<00:00, 8923.09 examples/s]
Map: 100%|██████████| 71/71 [00:00<00:00, 7311.46 examples/s]
Map: 100%|██████████| 822/822 [00:00<00:00, 9372.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:04<00:00, 11.12it/s, loss=0.663]


Epoch 1 Loss: 33.2010
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:04<00:00, 11.11it/s, loss=0.219]


Epoch 2 Loss: 16.4343
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:04<00:00, 10.81it/s, loss=0.193]


Epoch 3 Loss: 12.2040
Test Metrics: Precision=0.9058, Recall=0.9058, F1=0.9058

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 360, Split 1...


Map: 100%|██████████| 360/360 [00:00<00:00, 8963.62 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7195.55 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9270.26 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:03<00:00, 11.68it/s, loss=0.33] 


Epoch 1 Loss: 31.7061
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:03<00:00, 11.30it/s, loss=0.28] 


Epoch 2 Loss: 15.3092
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:03<00:00, 11.55it/s, loss=0.256]


Epoch 3 Loss: 11.6691
Test Metrics: Precision=0.9048, Recall=0.9048, F1=0.9048

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 360, Split 2...


Map: 100%|██████████| 360/360 [00:00<00:00, 9125.16 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 5630.04 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9146.24 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:03<00:00, 11.55it/s, loss=0.387]


Epoch 1 Loss: 34.0818
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:03<00:00, 11.42it/s, loss=0.351]


Epoch 2 Loss: 16.4353
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:03<00:00, 11.81it/s, loss=0.257]


Epoch 3 Loss: 12.0607
Test Metrics: Precision=0.9014, Recall=0.9014, F1=0.9014

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 360, Split 3...


Map: 100%|██████████| 360/360 [00:00<00:00, 8678.27 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 8020.98 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9471.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:04<00:00, 10.94it/s, loss=0.449]


Epoch 1 Loss: 29.5555
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:03<00:00, 11.28it/s, loss=0.223]


Epoch 2 Loss: 14.5893
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:04<00:00, 10.78it/s, loss=0.294]


Epoch 3 Loss: 10.4848
Test Metrics: Precision=0.9061, Recall=0.9061, F1=0.9061

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 360, Split 4...


Map: 100%|██████████| 360/360 [00:00<00:00, 9135.37 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 5939.89 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 8751.47 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:03<00:00, 12.22it/s, loss=0.303]


Epoch 1 Loss: 30.3572
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:03<00:00, 12.24it/s, loss=0.24] 


Epoch 2 Loss: 15.1695
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:03<00:00, 12.11it/s, loss=0.237]


Epoch 3 Loss: 10.9815
Test Metrics: Precision=0.9046, Recall=0.9046, F1=0.9046

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 360, Split 5...


Map: 100%|██████████| 360/360 [00:00<00:00, 8498.28 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 6357.82 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9057.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:04<00:00, 11.23it/s, loss=0.633]


Epoch 1 Loss: 31.3474
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:04<00:00, 10.82it/s, loss=0.204]


Epoch 2 Loss: 16.5695
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:04<00:00, 10.95it/s, loss=0.48] 


Epoch 3 Loss: 12.4401
Test Metrics: Precision=0.9036, Recall=0.9036, F1=0.9036

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 365, Split 1...


Map: 100%|██████████| 365/365 [00:00<00:00, 8556.07 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 7308.89 examples/s]
Map: 100%|██████████| 810/810 [00:00<00:00, 9092.19 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 46/46 [00:03<00:00, 11.74it/s, loss=0.432]


Epoch 1 Loss: 30.7006
Epoch 2/3


Training Epoch 2: 100%|██████████| 46/46 [00:03<00:00, 11.80it/s, loss=0.0852]


Epoch 2 Loss: 15.1148
Epoch 3/3


Training Epoch 3: 100%|██████████| 46/46 [00:03<00:00, 11.61it/s, loss=0.293] 


Epoch 3 Loss: 11.1937
Test Metrics: Precision=0.9095, Recall=0.9095, F1=0.9095

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 365, Split 2...


Map: 100%|██████████| 365/365 [00:00<00:00, 8952.96 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 8262.97 examples/s]
Map: 100%|██████████| 810/810 [00:00<00:00, 8989.58 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 46/46 [00:03<00:00, 11.58it/s, loss=0.291]


Epoch 1 Loss: 30.0271
Epoch 2/3


Training Epoch 2: 100%|██████████| 46/46 [00:04<00:00, 11.26it/s, loss=0.328]


Epoch 2 Loss: 15.5773
Epoch 3/3


Training Epoch 3: 100%|██████████| 46/46 [00:03<00:00, 12.02it/s, loss=0.152] 


Epoch 3 Loss: 11.6214
Test Metrics: Precision=0.9053, Recall=0.9053, F1=0.9053

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 365, Split 3...


Map: 100%|██████████| 365/365 [00:00<00:00, 8229.70 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 7073.84 examples/s]
Map: 100%|██████████| 810/810 [00:00<00:00, 9045.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 46/46 [00:04<00:00, 10.98it/s, loss=0.355]


Epoch 1 Loss: 30.5522
Epoch 2/3


Training Epoch 2: 100%|██████████| 46/46 [00:04<00:00, 11.04it/s, loss=0.318]


Epoch 2 Loss: 16.1186
Epoch 3/3


Training Epoch 3: 100%|██████████| 46/46 [00:04<00:00, 11.25it/s, loss=0.22] 


Epoch 3 Loss: 12.1815
Test Metrics: Precision=0.9032, Recall=0.9032, F1=0.9032

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 365, Split 4...


Map: 100%|██████████| 365/365 [00:00<00:00, 9556.73 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 6689.63 examples/s]
Map: 100%|██████████| 810/810 [00:00<00:00, 8724.42 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 46/46 [00:03<00:00, 12.02it/s, loss=0.39] 


Epoch 1 Loss: 29.4108
Epoch 2/3


Training Epoch 2: 100%|██████████| 46/46 [00:03<00:00, 12.26it/s, loss=0.279]


Epoch 2 Loss: 14.2233
Epoch 3/3


Training Epoch 3: 100%|██████████| 46/46 [00:03<00:00, 12.46it/s, loss=0.279] 


Epoch 3 Loss: 10.2701
Test Metrics: Precision=0.9054, Recall=0.9054, F1=0.9054

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 365, Split 5...


Map: 100%|██████████| 365/365 [00:00<00:00, 8510.74 examples/s]
Map: 100%|██████████| 73/73 [00:00<00:00, 7819.20 examples/s]
Map: 100%|██████████| 810/810 [00:00<00:00, 8733.39 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 46/46 [00:04<00:00, 10.93it/s, loss=0.458]


Epoch 1 Loss: 32.8489
Epoch 2/3


Training Epoch 2: 100%|██████████| 46/46 [00:04<00:00, 11.07it/s, loss=0.219]


Epoch 2 Loss: 17.2741
Epoch 3/3


Training Epoch 3: 100%|██████████| 46/46 [00:04<00:00, 11.19it/s, loss=0.321]


Epoch 3 Loss: 12.7792
Test Metrics: Precision=0.9010, Recall=0.9010, F1=0.9010

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 370, Split 1...


Map: 100%|██████████| 370/370 [00:00<00:00, 8382.22 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 7300.96 examples/s]
Map: 100%|██████████| 804/804 [00:00<00:00, 9166.61 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 47/47 [00:03<00:00, 11.85it/s, loss=0.184]


Epoch 1 Loss: 33.2046
Epoch 2/3


Training Epoch 2: 100%|██████████| 47/47 [00:03<00:00, 11.87it/s, loss=0.306]


Epoch 2 Loss: 16.2228
Epoch 3/3


Training Epoch 3: 100%|██████████| 47/47 [00:03<00:00, 11.86it/s, loss=0.181] 


Epoch 3 Loss: 11.7360
Test Metrics: Precision=0.9058, Recall=0.9058, F1=0.9058

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 370, Split 2...


Map: 100%|██████████| 370/370 [00:00<00:00, 8803.96 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 7270.18 examples/s]
Map: 100%|██████████| 804/804 [00:00<00:00, 8865.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 47/47 [00:04<00:00, 11.34it/s, loss=0.399]


Epoch 1 Loss: 31.5948
Epoch 2/3


Training Epoch 2: 100%|██████████| 47/47 [00:03<00:00, 11.77it/s, loss=0.0765]


Epoch 2 Loss: 15.7545
Epoch 3/3


Training Epoch 3: 100%|██████████| 47/47 [00:03<00:00, 11.78it/s, loss=0.485]


Epoch 3 Loss: 12.3139
Test Metrics: Precision=0.9008, Recall=0.9008, F1=0.9008

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 370, Split 3...


Map: 100%|██████████| 370/370 [00:00<00:00, 8251.59 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 7961.69 examples/s]
Map: 100%|██████████| 804/804 [00:00<00:00, 9381.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 47/47 [00:04<00:00, 10.72it/s, loss=0.482]


Epoch 1 Loss: 33.9596
Epoch 2/3


Training Epoch 2: 100%|██████████| 47/47 [00:04<00:00, 11.43it/s, loss=0.239]


Epoch 2 Loss: 16.4451
Epoch 3/3


Training Epoch 3: 100%|██████████| 47/47 [00:04<00:00, 10.87it/s, loss=0.736] 


Epoch 3 Loss: 12.5607
Test Metrics: Precision=0.9046, Recall=0.9046, F1=0.9046

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 370, Split 4...


Map: 100%|██████████| 370/370 [00:00<00:00, 9015.50 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 6811.33 examples/s]
Map: 100%|██████████| 804/804 [00:00<00:00, 9025.59 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 47/47 [00:04<00:00, 11.67it/s, loss=0.552]


Epoch 1 Loss: 31.7124
Epoch 2/3


Training Epoch 2: 100%|██████████| 47/47 [00:03<00:00, 12.07it/s, loss=0.227]


Epoch 2 Loss: 14.8432
Epoch 3/3


Training Epoch 3: 100%|██████████| 47/47 [00:03<00:00, 12.11it/s, loss=0.101]


Epoch 3 Loss: 10.9346
Test Metrics: Precision=0.9018, Recall=0.9018, F1=0.9018

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 370, Split 5...


Map: 100%|██████████| 370/370 [00:00<00:00, 9374.84 examples/s]
Map: 100%|██████████| 74/74 [00:00<00:00, 7829.93 examples/s]
Map: 100%|██████████| 804/804 [00:00<00:00, 9363.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 47/47 [00:04<00:00, 11.14it/s, loss=0.625]


Epoch 1 Loss: 33.2383
Epoch 2/3


Training Epoch 2: 100%|██████████| 47/47 [00:04<00:00, 11.13it/s, loss=0.402]


Epoch 2 Loss: 16.5737
Epoch 3/3


Training Epoch 3: 100%|██████████| 47/47 [00:04<00:00, 11.25it/s, loss=0.105]


Epoch 3 Loss: 12.6071
Test Metrics: Precision=0.9070, Recall=0.9070, F1=0.9070

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 375, Split 1...


Map: 100%|██████████| 375/375 [00:00<00:00, 8312.97 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 7971.13 examples/s]
Map: 100%|██████████| 798/798 [00:00<00:00, 9220.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 47/47 [00:03<00:00, 11.93it/s, loss=0.58] 


Epoch 1 Loss: 32.5014
Epoch 2/3


Training Epoch 2: 100%|██████████| 47/47 [00:03<00:00, 11.97it/s, loss=0.407]


Epoch 2 Loss: 15.9686
Epoch 3/3


Training Epoch 3: 100%|██████████| 47/47 [00:04<00:00, 11.65it/s, loss=0.327]


Epoch 3 Loss: 11.6436
Test Metrics: Precision=0.9062, Recall=0.9062, F1=0.9062

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 375, Split 2...


Map: 100%|██████████| 375/375 [00:00<00:00, 9200.13 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 6945.90 examples/s]
Map: 100%|██████████| 798/798 [00:00<00:00, 9236.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 47/47 [00:04<00:00, 11.63it/s, loss=0.547]


Epoch 1 Loss: 31.5814
Epoch 2/3


Training Epoch 2: 100%|██████████| 47/47 [00:04<00:00, 11.41it/s, loss=0.317]


Epoch 2 Loss: 16.1546
Epoch 3/3


Training Epoch 3: 100%|██████████| 47/47 [00:04<00:00, 11.64it/s, loss=0.199]


Epoch 3 Loss: 12.2338
Test Metrics: Precision=0.9024, Recall=0.9024, F1=0.9024

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 375, Split 3...


Map: 100%|██████████| 375/375 [00:00<00:00, 8525.89 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 7438.82 examples/s]
Map: 100%|██████████| 798/798 [00:00<00:00, 9161.03 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 47/47 [00:04<00:00, 10.73it/s, loss=0.278]


Epoch 1 Loss: 31.2966
Epoch 2/3


Training Epoch 2: 100%|██████████| 47/47 [00:04<00:00, 10.81it/s, loss=0.343]


Epoch 2 Loss: 16.0118
Epoch 3/3


Training Epoch 3: 100%|██████████| 47/47 [00:04<00:00, 10.79it/s, loss=0.284]


Epoch 3 Loss: 12.0863
Test Metrics: Precision=0.9075, Recall=0.9075, F1=0.9075

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 375, Split 4...


Map: 100%|██████████| 375/375 [00:00<00:00, 9422.80 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 6705.16 examples/s]
Map: 100%|██████████| 798/798 [00:00<00:00, 9000.22 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 47/47 [00:04<00:00, 11.68it/s, loss=0.413]


Epoch 1 Loss: 33.5045
Epoch 2/3


Training Epoch 2: 100%|██████████| 47/47 [00:03<00:00, 11.85it/s, loss=0.462]


Epoch 2 Loss: 16.3830
Epoch 3/3


Training Epoch 3: 100%|██████████| 47/47 [00:03<00:00, 12.47it/s, loss=0.193]


Epoch 3 Loss: 12.0957
Test Metrics: Precision=0.8971, Recall=0.8971, F1=0.8971

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 375, Split 5...


Map: 100%|██████████| 375/375 [00:00<00:00, 9056.57 examples/s]
Map: 100%|██████████| 75/75 [00:00<00:00, 6723.80 examples/s]
Map: 100%|██████████| 798/798 [00:00<00:00, 9246.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 47/47 [00:04<00:00, 11.03it/s, loss=0.544]


Epoch 1 Loss: 32.7184
Epoch 2/3


Training Epoch 2: 100%|██████████| 47/47 [00:04<00:00, 11.20it/s, loss=0.228]


Epoch 2 Loss: 16.9092
Epoch 3/3


Training Epoch 3: 100%|██████████| 47/47 [00:04<00:00, 11.24it/s, loss=0.186]


Epoch 3 Loss: 12.8157
Test Metrics: Precision=0.9090, Recall=0.9090, F1=0.9090

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 380, Split 1...


Map: 100%|██████████| 380/380 [00:00<00:00, 8669.64 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7050.81 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9136.66 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:04<00:00, 11.76it/s, loss=0.501]


Epoch 1 Loss: 32.3275
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:04<00:00, 11.92it/s, loss=0.212]


Epoch 2 Loss: 16.4457
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:04<00:00, 11.75it/s, loss=0.227]


Epoch 3 Loss: 11.7944
Test Metrics: Precision=0.9064, Recall=0.9064, F1=0.9064

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 380, Split 2...


Map: 100%|██████████| 380/380 [00:00<00:00, 9017.30 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 6250.09 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9026.70 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:04<00:00, 11.81it/s, loss=0.347]


Epoch 1 Loss: 32.9638
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:04<00:00, 11.38it/s, loss=0.151]


Epoch 2 Loss: 17.0337
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:04<00:00, 11.21it/s, loss=0.419]


Epoch 3 Loss: 13.1160
Test Metrics: Precision=0.9036, Recall=0.9036, F1=0.9036

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 380, Split 3...


Map: 100%|██████████| 380/380 [00:00<00:00, 8775.56 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7513.13 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9308.69 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:04<00:00, 10.74it/s, loss=0.476]


Epoch 1 Loss: 33.7443
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:04<00:00, 10.74it/s, loss=0.319]


Epoch 2 Loss: 16.8131
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:04<00:00, 10.91it/s, loss=0.367]


Epoch 3 Loss: 12.7410
Test Metrics: Precision=0.9019, Recall=0.9019, F1=0.9019

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 380, Split 4...


Map: 100%|██████████| 380/380 [00:00<00:00, 9667.87 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 6968.50 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 8975.82 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:04<00:00, 11.85it/s, loss=0.498]


Epoch 1 Loss: 30.9328
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:04<00:00, 11.94it/s, loss=0.252]


Epoch 2 Loss: 15.5899
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:04<00:00, 11.62it/s, loss=0.297] 


Epoch 3 Loss: 11.7843
Test Metrics: Precision=0.9048, Recall=0.9048, F1=0.9048

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 380, Split 5...


Map: 100%|██████████| 380/380 [00:00<00:00, 9090.90 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 8118.15 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9298.11 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:04<00:00, 10.93it/s, loss=0.437]


Epoch 1 Loss: 35.0863
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:04<00:00, 10.94it/s, loss=0.359]


Epoch 2 Loss: 18.0527
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:04<00:00, 11.14it/s, loss=0.621]


Epoch 3 Loss: 13.5360
Test Metrics: Precision=0.9078, Recall=0.9078, F1=0.9078

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 385, Split 1...


Map: 100%|██████████| 385/385 [00:00<00:00, 8646.20 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 6834.00 examples/s]
Map: 100%|██████████| 786/786 [00:00<00:00, 9232.11 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 49/49 [00:04<00:00, 12.18it/s, loss=0.272]


Epoch 1 Loss: 34.1031
Epoch 2/3


Training Epoch 2: 100%|██████████| 49/49 [00:04<00:00, 12.00it/s, loss=0.302]


Epoch 2 Loss: 16.5258
Epoch 3/3


Training Epoch 3: 100%|██████████| 49/49 [00:04<00:00, 11.91it/s, loss=0.00643]


Epoch 3 Loss: 12.8455
Test Metrics: Precision=0.9052, Recall=0.9052, F1=0.9052

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 385, Split 2...


Map: 100%|██████████| 385/385 [00:00<00:00, 9438.68 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 7267.52 examples/s]
Map: 100%|██████████| 786/786 [00:00<00:00, 9458.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 49/49 [00:04<00:00, 11.35it/s, loss=1.62] 


Epoch 1 Loss: 31.9177
Epoch 2/3


Training Epoch 2: 100%|██████████| 49/49 [00:04<00:00, 11.60it/s, loss=0.303]


Epoch 2 Loss: 17.4446
Epoch 3/3


Training Epoch 3: 100%|██████████| 49/49 [00:04<00:00, 11.61it/s, loss=0.231]


Epoch 3 Loss: 13.1847
Test Metrics: Precision=0.9006, Recall=0.9006, F1=0.9006

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 385, Split 3...


Map: 100%|██████████| 385/385 [00:00<00:00, 8615.75 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 8493.84 examples/s]
Map: 100%|██████████| 786/786 [00:00<00:00, 9409.53 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 49/49 [00:04<00:00, 10.73it/s, loss=0.477]


Epoch 1 Loss: 36.5606
Epoch 2/3


Training Epoch 2: 100%|██████████| 49/49 [00:04<00:00, 11.03it/s, loss=0.424]


Epoch 2 Loss: 17.6449
Epoch 3/3


Training Epoch 3: 100%|██████████| 49/49 [00:04<00:00, 10.97it/s, loss=0.272]


Epoch 3 Loss: 13.5483
Test Metrics: Precision=0.9019, Recall=0.9019, F1=0.9019

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 385, Split 4...


Map: 100%|██████████| 385/385 [00:00<00:00, 9654.65 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 7350.89 examples/s]
Map: 100%|██████████| 786/786 [00:00<00:00, 9104.63 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 49/49 [00:04<00:00, 11.79it/s, loss=0.141]


Epoch 1 Loss: 31.7000
Epoch 2/3


Training Epoch 2: 100%|██████████| 49/49 [00:04<00:00, 11.72it/s, loss=0.311]


Epoch 2 Loss: 14.9030
Epoch 3/3


Training Epoch 3: 100%|██████████| 49/49 [00:04<00:00, 11.85it/s, loss=0.111] 


Epoch 3 Loss: 10.5817
Test Metrics: Precision=0.9030, Recall=0.9030, F1=0.9030

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 385, Split 5...


Map: 100%|██████████| 385/385 [00:00<00:00, 8642.36 examples/s]
Map: 100%|██████████| 77/77 [00:00<00:00, 7841.91 examples/s]
Map: 100%|██████████| 786/786 [00:00<00:00, 9136.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 49/49 [00:04<00:00, 11.24it/s, loss=0.46] 


Epoch 1 Loss: 33.8310
Epoch 2/3


Training Epoch 2: 100%|██████████| 49/49 [00:04<00:00, 10.86it/s, loss=0.377]


Epoch 2 Loss: 18.2445
Epoch 3/3


Training Epoch 3: 100%|██████████| 49/49 [00:04<00:00, 10.91it/s, loss=0.126]


Epoch 3 Loss: 13.1342
Test Metrics: Precision=0.9061, Recall=0.9061, F1=0.9061

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 390, Split 1...


Map: 100%|██████████| 390/390 [00:00<00:00, 9119.88 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 6408.28 examples/s]
Map: 100%|██████████| 780/780 [00:00<00:00, 9664.21 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 49/49 [00:04<00:00, 11.61it/s, loss=0.394]


Epoch 1 Loss: 31.1093
Epoch 2/3


Training Epoch 2: 100%|██████████| 49/49 [00:04<00:00, 11.58it/s, loss=0.299]


Epoch 2 Loss: 15.5678
Epoch 3/3


Training Epoch 3: 100%|██████████| 49/49 [00:04<00:00, 11.56it/s, loss=0.228]


Epoch 3 Loss: 11.2747
Test Metrics: Precision=0.9071, Recall=0.9071, F1=0.9071

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 390, Split 2...


Map: 100%|██████████| 390/390 [00:00<00:00, 9169.62 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 7382.01 examples/s]
Map: 100%|██████████| 780/780 [00:00<00:00, 9408.35 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 49/49 [00:04<00:00, 11.48it/s, loss=0.367]


Epoch 1 Loss: 33.4885
Epoch 2/3


Training Epoch 2: 100%|██████████| 49/49 [00:04<00:00, 11.35it/s, loss=0.243]


Epoch 2 Loss: 16.6422
Epoch 3/3


Training Epoch 3: 100%|██████████| 49/49 [00:04<00:00, 11.51it/s, loss=0.31] 


Epoch 3 Loss: 12.7230
Test Metrics: Precision=0.9042, Recall=0.9042, F1=0.9042

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 390, Split 3...


Map: 100%|██████████| 390/390 [00:00<00:00, 8741.73 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 7957.09 examples/s]
Map: 100%|██████████| 780/780 [00:00<00:00, 9526.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 49/49 [00:04<00:00, 11.04it/s, loss=0.332]


Epoch 1 Loss: 30.6525
Epoch 2/3


Training Epoch 2: 100%|██████████| 49/49 [00:04<00:00, 10.79it/s, loss=0.399]


Epoch 2 Loss: 16.2372
Epoch 3/3


Training Epoch 3: 100%|██████████| 49/49 [00:04<00:00, 10.97it/s, loss=0.347]


Epoch 3 Loss: 11.9059
Test Metrics: Precision=0.9083, Recall=0.9083, F1=0.9083

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 390, Split 4...


Map: 100%|██████████| 390/390 [00:00<00:00, 9642.87 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 7100.04 examples/s]
Map: 100%|██████████| 780/780 [00:00<00:00, 2855.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 49/49 [00:04<00:00, 11.83it/s, loss=0.445]


Epoch 1 Loss: 32.0882
Epoch 2/3


Training Epoch 2: 100%|██████████| 49/49 [00:04<00:00, 11.57it/s, loss=0.227]


Epoch 2 Loss: 14.6574
Epoch 3/3


Training Epoch 3: 100%|██████████| 49/49 [00:04<00:00, 11.74it/s, loss=0.194] 


Epoch 3 Loss: 10.6423
Test Metrics: Precision=0.9063, Recall=0.9063, F1=0.9063

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 390, Split 5...


Map: 100%|██████████| 390/390 [00:00<00:00, 9069.27 examples/s]
Map: 100%|██████████| 78/78 [00:00<00:00, 7010.13 examples/s]
Map: 100%|██████████| 780/780 [00:00<00:00, 9444.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 49/49 [00:04<00:00, 11.09it/s, loss=0.344]


Epoch 1 Loss: 34.2975
Epoch 2/3


Training Epoch 2: 100%|██████████| 49/49 [00:04<00:00, 11.16it/s, loss=0.197]


Epoch 2 Loss: 17.3040
Epoch 3/3


Training Epoch 3: 100%|██████████| 49/49 [00:04<00:00, 11.23it/s, loss=0.328]


Epoch 3 Loss: 12.3753
Test Metrics: Precision=0.9100, Recall=0.9100, F1=0.9100

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 395, Split 1...


Map: 100%|██████████| 395/395 [00:00<00:00, 8941.29 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 6497.95 examples/s]
Map: 100%|██████████| 774/774 [00:00<00:00, 9426.61 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:04<00:00, 11.65it/s, loss=0.608]


Epoch 1 Loss: 34.5781
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:04<00:00, 11.77it/s, loss=0.216]


Epoch 2 Loss: 16.5995
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:04<00:00, 11.78it/s, loss=0.331] 


Epoch 3 Loss: 12.3706
Test Metrics: Precision=0.9058, Recall=0.9058, F1=0.9058

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 395, Split 2...


Map: 100%|██████████| 395/395 [00:00<00:00, 8914.11 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 7422.38 examples/s]
Map: 100%|██████████| 774/774 [00:00<00:00, 9276.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:04<00:00, 11.11it/s, loss=0.506]


Epoch 1 Loss: 32.0995
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:04<00:00, 12.24it/s, loss=0.27] 


Epoch 2 Loss: 17.6391
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:04<00:00, 11.74it/s, loss=0.261]


Epoch 3 Loss: 12.7614
Test Metrics: Precision=0.9028, Recall=0.9028, F1=0.9028

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 395, Split 3...


Map: 100%|██████████| 395/395 [00:00<00:00, 8591.14 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 7710.30 examples/s]
Map: 100%|██████████| 774/774 [00:00<00:00, 9272.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:04<00:00, 11.00it/s, loss=0.339]


Epoch 1 Loss: 32.8236
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:04<00:00, 11.00it/s, loss=0.296]


Epoch 2 Loss: 16.6462
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:04<00:00, 10.94it/s, loss=0.209]


Epoch 3 Loss: 12.2590
Test Metrics: Precision=0.9079, Recall=0.9079, F1=0.9079

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 395, Split 4...


Map: 100%|██████████| 395/395 [00:00<00:00, 9454.77 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 6751.64 examples/s]
Map: 100%|██████████| 774/774 [00:00<00:00, 9260.51 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:04<00:00, 12.01it/s, loss=0.655]


Epoch 1 Loss: 34.2065
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:04<00:00, 11.71it/s, loss=0.09] 


Epoch 2 Loss: 15.4323
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:04<00:00, 11.58it/s, loss=0.092]


Epoch 3 Loss: 11.9233
Test Metrics: Precision=0.9055, Recall=0.9055, F1=0.9055

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 395, Split 5...


Map: 100%|██████████| 395/395 [00:00<00:00, 9005.94 examples/s]
Map: 100%|██████████| 79/79 [00:00<00:00, 7893.23 examples/s]
Map: 100%|██████████| 774/774 [00:00<00:00, 9163.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:04<00:00, 11.18it/s, loss=0.163]


Epoch 1 Loss: 37.1027
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:04<00:00, 11.09it/s, loss=0.196]


Epoch 2 Loss: 18.0591
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:04<00:00, 10.98it/s, loss=0.361]


Epoch 3 Loss: 13.3630
Test Metrics: Precision=0.9077, Recall=0.9077, F1=0.9077

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 400, Split 1...


Map: 100%|██████████| 400/400 [00:00<00:00, 8727.13 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7258.15 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9403.31 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:04<00:00, 11.78it/s, loss=0.449]


Epoch 1 Loss: 33.6256
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:04<00:00, 11.71it/s, loss=0.232]


Epoch 2 Loss: 15.9022
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:04<00:00, 11.92it/s, loss=0.245] 


Epoch 3 Loss: 11.5766
Test Metrics: Precision=0.9078, Recall=0.9078, F1=0.9078

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 400, Split 2...


Map: 100%|██████████| 400/400 [00:00<00:00, 9031.32 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7775.33 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9317.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:04<00:00, 11.35it/s, loss=0.386]


Epoch 1 Loss: 33.1959
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:04<00:00, 11.52it/s, loss=0.352]


Epoch 2 Loss: 16.1387
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:04<00:00, 11.11it/s, loss=0.346]


Epoch 3 Loss: 12.1084
Test Metrics: Precision=0.9061, Recall=0.9061, F1=0.9061

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 400, Split 3...


Map: 100%|██████████| 400/400 [00:00<00:00, 8535.20 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 6909.18 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9471.27 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:04<00:00, 10.97it/s, loss=0.432]


Epoch 1 Loss: 34.9717
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:04<00:00, 10.89it/s, loss=0.409]


Epoch 2 Loss: 17.4881
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:04<00:00, 10.90it/s, loss=0.115]


Epoch 3 Loss: 13.0451
Test Metrics: Precision=0.9064, Recall=0.9064, F1=0.9064

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 400, Split 4...


Map: 100%|██████████| 400/400 [00:00<00:00, 8239.07 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 6261.79 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 8869.87 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:04<00:00, 11.96it/s, loss=0.482]


Epoch 1 Loss: 31.9893
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:04<00:00, 11.64it/s, loss=0.392]


Epoch 2 Loss: 15.9894
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:04<00:00, 11.79it/s, loss=0.265]


Epoch 3 Loss: 11.8559
Test Metrics: Precision=0.9059, Recall=0.9059, F1=0.9059

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 400, Split 5...


Map: 100%|██████████| 400/400 [00:00<00:00, 8897.50 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7215.08 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9372.83 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:04<00:00, 11.15it/s, loss=0.562]


Epoch 1 Loss: 35.1703
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:04<00:00, 10.79it/s, loss=0.254]


Epoch 2 Loss: 17.1320
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:04<00:00, 10.77it/s, loss=0.313]


Epoch 3 Loss: 12.8075
Test Metrics: Precision=0.9069, Recall=0.9069, F1=0.9069

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 405, Split 1...


Map: 100%|██████████| 405/405 [00:00<00:00, 8918.19 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 7355.08 examples/s]
Map: 100%|██████████| 762/762 [00:00<00:00, 9286.39 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 51/51 [00:04<00:00, 11.86it/s, loss=0.373]


Epoch 1 Loss: 33.8854
Epoch 2/3


Training Epoch 2: 100%|██████████| 51/51 [00:04<00:00, 11.84it/s, loss=0.283]


Epoch 2 Loss: 17.0571
Epoch 3/3


Training Epoch 3: 100%|██████████| 51/51 [00:04<00:00, 12.01it/s, loss=0.23] 


Epoch 3 Loss: 12.2352
Test Metrics: Precision=0.9087, Recall=0.9087, F1=0.9087

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 405, Split 2...


Map: 100%|██████████| 405/405 [00:00<00:00, 8770.98 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 7400.75 examples/s]
Map: 100%|██████████| 762/762 [00:00<00:00, 9315.79 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 51/51 [00:04<00:00, 11.79it/s, loss=0.45] 


Epoch 1 Loss: 33.8192
Epoch 2/3


Training Epoch 2: 100%|██████████| 51/51 [00:04<00:00, 11.74it/s, loss=0.21] 


Epoch 2 Loss: 16.7689
Epoch 3/3


Training Epoch 3: 100%|██████████| 51/51 [00:04<00:00, 11.76it/s, loss=0.308]


Epoch 3 Loss: 12.4175
Test Metrics: Precision=0.9042, Recall=0.9042, F1=0.9042

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 405, Split 3...


Map: 100%|██████████| 405/405 [00:00<00:00, 8571.90 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 6912.84 examples/s]
Map: 100%|██████████| 762/762 [00:00<00:00, 9370.22 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 51/51 [00:04<00:00, 11.02it/s, loss=0.414]


Epoch 1 Loss: 33.4797
Epoch 2/3


Training Epoch 2: 100%|██████████| 51/51 [00:04<00:00, 10.93it/s, loss=0.277]


Epoch 2 Loss: 17.2229
Epoch 3/3


Training Epoch 3: 100%|██████████| 51/51 [00:04<00:00, 10.92it/s, loss=0.21] 


Epoch 3 Loss: 12.6162
Test Metrics: Precision=0.9097, Recall=0.9097, F1=0.9097

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 405, Split 4...


Map: 100%|██████████| 405/405 [00:00<00:00, 9090.19 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 8536.36 examples/s]
Map: 100%|██████████| 762/762 [00:00<00:00, 9015.84 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 51/51 [00:04<00:00, 11.61it/s, loss=0.404]


Epoch 1 Loss: 29.7658
Epoch 2/3


Training Epoch 2: 100%|██████████| 51/51 [00:04<00:00, 11.51it/s, loss=0.276]


Epoch 2 Loss: 15.0760
Epoch 3/3


Training Epoch 3: 100%|██████████| 51/51 [00:04<00:00, 11.61it/s, loss=0.275] 


Epoch 3 Loss: 10.7055
Test Metrics: Precision=0.9093, Recall=0.9093, F1=0.9093

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 405, Split 5...


Map: 100%|██████████| 405/405 [00:00<00:00, 9355.17 examples/s]
Map: 100%|██████████| 81/81 [00:00<00:00, 7014.47 examples/s]
Map: 100%|██████████| 762/762 [00:00<00:00, 9457.42 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 51/51 [00:04<00:00, 11.24it/s, loss=0.366]


Epoch 1 Loss: 34.0740
Epoch 2/3


Training Epoch 2: 100%|██████████| 51/51 [00:04<00:00, 11.15it/s, loss=0.308]


Epoch 2 Loss: 18.4774
Epoch 3/3


Training Epoch 3: 100%|██████████| 51/51 [00:04<00:00, 11.07it/s, loss=0.682]


Epoch 3 Loss: 14.2020
Test Metrics: Precision=0.9086, Recall=0.9086, F1=0.9086

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 410, Split 1...


Map: 100%|██████████| 410/410 [00:00<00:00, 8788.87 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 8084.55 examples/s]
Map: 100%|██████████| 756/756 [00:00<00:00, 9340.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 52/52 [00:04<00:00, 12.02it/s, loss=0.531]


Epoch 1 Loss: 33.9318
Epoch 2/3


Training Epoch 2: 100%|██████████| 52/52 [00:04<00:00, 11.65it/s, loss=0.419]


Epoch 2 Loss: 16.1295
Epoch 3/3


Training Epoch 3: 100%|██████████| 52/52 [00:04<00:00, 12.02it/s, loss=0.304] 


Epoch 3 Loss: 11.8885
Test Metrics: Precision=0.9091, Recall=0.9091, F1=0.9091

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 410, Split 2...


Map: 100%|██████████| 410/410 [00:00<00:00, 9327.60 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 7241.61 examples/s]
Map: 100%|██████████| 756/756 [00:00<00:00, 9299.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 52/52 [00:04<00:00, 11.22it/s, loss=0.405]


Epoch 1 Loss: 36.1178
Epoch 2/3


Training Epoch 2: 100%|██████████| 52/52 [00:04<00:00, 11.92it/s, loss=0.394]


Epoch 2 Loss: 18.6049
Epoch 3/3


Training Epoch 3: 100%|██████████| 52/52 [00:04<00:00, 11.86it/s, loss=0.0669]


Epoch 3 Loss: 13.6138
Test Metrics: Precision=0.9039, Recall=0.9039, F1=0.9039

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 410, Split 3...


Map: 100%|██████████| 410/410 [00:00<00:00, 8915.17 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 6286.59 examples/s]
Map: 100%|██████████| 756/756 [00:00<00:00, 9687.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 52/52 [00:04<00:00, 10.95it/s, loss=0.615]


Epoch 1 Loss: 35.4244
Epoch 2/3


Training Epoch 2: 100%|██████████| 52/52 [00:04<00:00, 11.00it/s, loss=0.278]


Epoch 2 Loss: 18.6900
Epoch 3/3


Training Epoch 3: 100%|██████████| 52/52 [00:04<00:00, 11.14it/s, loss=0.187]


Epoch 3 Loss: 13.7039
Test Metrics: Precision=0.9032, Recall=0.9032, F1=0.9032

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 410, Split 4...


Map: 100%|██████████| 410/410 [00:00<00:00, 9406.38 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 6574.02 examples/s]
Map: 100%|██████████| 756/756 [00:00<00:00, 8634.39 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 52/52 [00:04<00:00, 11.57it/s, loss=0.268]


Epoch 1 Loss: 32.4790
Epoch 2/3


Training Epoch 2: 100%|██████████| 52/52 [00:04<00:00, 11.65it/s, loss=0.38] 


Epoch 2 Loss: 16.5471
Epoch 3/3


Training Epoch 3: 100%|██████████| 52/52 [00:04<00:00, 11.99it/s, loss=0.436]


Epoch 3 Loss: 12.0453
Test Metrics: Precision=0.9050, Recall=0.9050, F1=0.9050

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 410, Split 5...


Map: 100%|██████████| 410/410 [00:00<00:00, 8707.07 examples/s]
Map: 100%|██████████| 82/82 [00:00<00:00, 7442.99 examples/s]
Map: 100%|██████████| 756/756 [00:00<00:00, 9222.67 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 52/52 [00:04<00:00, 11.24it/s, loss=0.276]


Epoch 1 Loss: 33.1437
Epoch 2/3


Training Epoch 2: 100%|██████████| 52/52 [00:04<00:00, 10.97it/s, loss=0.244]


Epoch 2 Loss: 17.9277
Epoch 3/3


Training Epoch 3: 100%|██████████| 52/52 [00:04<00:00, 11.00it/s, loss=0.191]


Epoch 3 Loss: 13.3476
Test Metrics: Precision=0.9125, Recall=0.9125, F1=0.9125

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 415, Split 1...


Map: 100%|██████████| 415/415 [00:00<00:00, 8923.09 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 7125.87 examples/s]
Map: 100%|██████████| 750/750 [00:00<00:00, 9449.10 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 52/52 [00:04<00:00, 11.87it/s, loss=0.416]


Epoch 1 Loss: 33.3630
Epoch 2/3


Training Epoch 2: 100%|██████████| 52/52 [00:04<00:00, 11.76it/s, loss=0.352]


Epoch 2 Loss: 16.2505
Epoch 3/3


Training Epoch 3: 100%|██████████| 52/52 [00:04<00:00, 11.96it/s, loss=0.132]


Epoch 3 Loss: 12.0500
Test Metrics: Precision=0.9128, Recall=0.9128, F1=0.9128

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 415, Split 2...


Map: 100%|██████████| 415/415 [00:00<00:00, 8841.28 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 6716.84 examples/s]
Map: 100%|██████████| 750/750 [00:00<00:00, 9269.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 52/52 [00:04<00:00, 11.50it/s, loss=0.378]


Epoch 1 Loss: 33.9412
Epoch 2/3


Training Epoch 2: 100%|██████████| 52/52 [00:04<00:00, 11.44it/s, loss=0.211]


Epoch 2 Loss: 16.2500
Epoch 3/3


Training Epoch 3: 100%|██████████| 52/52 [00:04<00:00, 11.80it/s, loss=0.197]


Epoch 3 Loss: 12.1372
Test Metrics: Precision=0.9093, Recall=0.9093, F1=0.9093

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 415, Split 3...


Map: 100%|██████████| 415/415 [00:00<00:00, 8479.86 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 7091.03 examples/s]
Map: 100%|██████████| 750/750 [00:00<00:00, 9647.13 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 52/52 [00:04<00:00, 11.05it/s, loss=0.385]


Epoch 1 Loss: 34.2645
Epoch 2/3


Training Epoch 2: 100%|██████████| 52/52 [00:04<00:00, 10.66it/s, loss=0.831]


Epoch 2 Loss: 16.5960
Epoch 3/3


Training Epoch 3: 100%|██████████| 52/52 [00:04<00:00, 11.05it/s, loss=0.237]


Epoch 3 Loss: 12.2365
Test Metrics: Precision=0.9135, Recall=0.9135, F1=0.9135

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 415, Split 4...


Map: 100%|██████████| 415/415 [00:00<00:00, 9275.38 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 7243.75 examples/s]
Map: 100%|██████████| 750/750 [00:00<00:00, 9096.51 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 52/52 [00:04<00:00, 11.56it/s, loss=0.301]


Epoch 1 Loss: 33.0968
Epoch 2/3


Training Epoch 2: 100%|██████████| 52/52 [00:04<00:00, 11.73it/s, loss=0.412]


Epoch 2 Loss: 16.2576
Epoch 3/3


Training Epoch 3: 100%|██████████| 52/52 [00:04<00:00, 11.55it/s, loss=0.291] 


Epoch 3 Loss: 11.4947
Test Metrics: Precision=0.9099, Recall=0.9099, F1=0.9099

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 415, Split 5...


Map: 100%|██████████| 415/415 [00:00<00:00, 9017.30 examples/s]
Map: 100%|██████████| 83/83 [00:00<00:00, 7799.95 examples/s]
Map: 100%|██████████| 750/750 [00:00<00:00, 9361.51 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 52/52 [00:04<00:00, 10.91it/s, loss=0.47] 


Epoch 1 Loss: 34.9531
Epoch 2/3


Training Epoch 2: 100%|██████████| 52/52 [00:04<00:00, 11.11it/s, loss=0.236]


Epoch 2 Loss: 17.5575
Epoch 3/3


Training Epoch 3: 100%|██████████| 52/52 [00:04<00:00, 11.24it/s, loss=0.341]


Epoch 3 Loss: 13.3968
Test Metrics: Precision=0.9114, Recall=0.9114, F1=0.9114

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 420, Split 1...


Map: 100%|██████████| 420/420 [00:00<00:00, 8744.86 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 8605.59 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 8395.31 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:04<00:00, 11.77it/s, loss=0.199]


Epoch 1 Loss: 35.8168
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:04<00:00, 11.87it/s, loss=0.284]


Epoch 2 Loss: 16.8532
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:04<00:00, 12.08it/s, loss=0.0794]


Epoch 3 Loss: 12.0627
Test Metrics: Precision=0.9090, Recall=0.9090, F1=0.9090

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 420, Split 2...


Map: 100%|██████████| 420/420 [00:00<00:00, 8733.71 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7431.53 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 2721.51 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:04<00:00, 11.56it/s, loss=0.353]


Epoch 1 Loss: 35.1048
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:04<00:00, 11.73it/s, loss=0.279]


Epoch 2 Loss: 17.6908
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:04<00:00, 11.56it/s, loss=0.369]


Epoch 3 Loss: 13.7662
Test Metrics: Precision=0.9047, Recall=0.9047, F1=0.9047

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 420, Split 3...


Map: 100%|██████████| 420/420 [00:00<00:00, 8496.68 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7410.74 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9628.81 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:04<00:00, 10.92it/s, loss=0.337]


Epoch 1 Loss: 34.5486
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:04<00:00, 11.16it/s, loss=0.188]


Epoch 2 Loss: 17.5662
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:04<00:00, 11.18it/s, loss=0.173]


Epoch 3 Loss: 12.7743
Test Metrics: Precision=0.9154, Recall=0.9154, F1=0.9154

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 420, Split 4...


Map: 100%|██████████| 420/420 [00:00<00:00, 9125.09 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7670.00 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9098.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:04<00:00, 11.72it/s, loss=0.363]


Epoch 1 Loss: 33.7774
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:04<00:00, 11.57it/s, loss=0.376]


Epoch 2 Loss: 16.8971
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:04<00:00, 11.87it/s, loss=0.436]


Epoch 3 Loss: 12.4112
Test Metrics: Precision=0.9096, Recall=0.9096, F1=0.9096

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 420, Split 5...


Map: 100%|██████████| 420/420 [00:00<00:00, 8811.12 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7925.89 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9121.70 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:04<00:00, 10.93it/s, loss=0.429]


Epoch 1 Loss: 38.3290
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:04<00:00, 10.95it/s, loss=0.479]


Epoch 2 Loss: 18.4932
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:04<00:00, 10.98it/s, loss=0.296]


Epoch 3 Loss: 13.7167
Test Metrics: Precision=0.9074, Recall=0.9074, F1=0.9074

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 425, Split 1...


Map: 100%|██████████| 425/425 [00:00<00:00, 8826.31 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 8095.46 examples/s]
Map: 100%|██████████| 738/738 [00:00<00:00, 9392.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 54/54 [00:04<00:00, 11.70it/s, loss=0.161]


Epoch 1 Loss: 39.4719
Epoch 2/3


Training Epoch 2: 100%|██████████| 54/54 [00:04<00:00, 11.47it/s, loss=0.251]


Epoch 2 Loss: 19.2745
Epoch 3/3


Training Epoch 3: 100%|██████████| 54/54 [00:04<00:00, 11.71it/s, loss=0.368]


Epoch 3 Loss: 13.7530
Test Metrics: Precision=0.9065, Recall=0.9065, F1=0.9065

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 425, Split 2...


Map: 100%|██████████| 425/425 [00:00<00:00, 9269.54 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 7174.66 examples/s]
Map: 100%|██████████| 738/738 [00:00<00:00, 9369.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 54/54 [00:04<00:00, 12.02it/s, loss=0.512]


Epoch 1 Loss: 35.7046
Epoch 2/3


Training Epoch 2: 100%|██████████| 54/54 [00:04<00:00, 11.58it/s, loss=0.214]


Epoch 2 Loss: 18.7980
Epoch 3/3


Training Epoch 3: 100%|██████████| 54/54 [00:04<00:00, 11.50it/s, loss=0.146]


Epoch 3 Loss: 14.1172
Test Metrics: Precision=0.9030, Recall=0.9030, F1=0.9030

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 425, Split 3...


Map: 100%|██████████| 425/425 [00:00<00:00, 9075.81 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 8053.40 examples/s]
Map: 100%|██████████| 738/738 [00:00<00:00, 9573.55 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 54/54 [00:04<00:00, 11.12it/s, loss=0.265]


Epoch 1 Loss: 34.5953
Epoch 2/3


Training Epoch 2: 100%|██████████| 54/54 [00:04<00:00, 11.43it/s, loss=0.954]


Epoch 2 Loss: 17.9011
Epoch 3/3


Training Epoch 3: 100%|██████████| 54/54 [00:04<00:00, 10.89it/s, loss=0.156]


Epoch 3 Loss: 13.6082
Test Metrics: Precision=0.9085, Recall=0.9085, F1=0.9085

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 425, Split 4...


Map: 100%|██████████| 425/425 [00:00<00:00, 9506.68 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 7114.10 examples/s]
Map: 100%|██████████| 738/738 [00:00<00:00, 9033.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 54/54 [00:04<00:00, 11.48it/s, loss=0.644]


Epoch 1 Loss: 34.4679
Epoch 2/3


Training Epoch 2: 100%|██████████| 54/54 [00:04<00:00, 12.22it/s, loss=0.227]


Epoch 2 Loss: 16.9914
Epoch 3/3


Training Epoch 3: 100%|██████████| 54/54 [00:04<00:00, 11.99it/s, loss=0.403]


Epoch 3 Loss: 12.3637
Test Metrics: Precision=0.9052, Recall=0.9052, F1=0.9052

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 425, Split 5...


Map: 100%|██████████| 425/425 [00:00<00:00, 9005.93 examples/s]
Map: 100%|██████████| 85/85 [00:00<00:00, 7200.74 examples/s]
Map: 100%|██████████| 738/738 [00:00<00:00, 9480.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 54/54 [00:04<00:00, 11.31it/s, loss=0.117]


Epoch 1 Loss: 34.9017
Epoch 2/3


Training Epoch 2: 100%|██████████| 54/54 [00:04<00:00, 10.92it/s, loss=0.0629]


Epoch 2 Loss: 17.3775
Epoch 3/3


Training Epoch 3: 100%|██████████| 54/54 [00:04<00:00, 11.01it/s, loss=0.513]


Epoch 3 Loss: 13.4250
Test Metrics: Precision=0.9121, Recall=0.9121, F1=0.9121

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 430, Split 1...


Map: 100%|██████████| 430/430 [00:00<00:00, 9048.20 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 7778.96 examples/s]
Map: 100%|██████████| 732/732 [00:00<00:00, 9428.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 54/54 [00:04<00:00, 11.63it/s, loss=0.456]


Epoch 1 Loss: 34.9647
Epoch 2/3


Training Epoch 2: 100%|██████████| 54/54 [00:04<00:00, 11.70it/s, loss=0.294]


Epoch 2 Loss: 16.8621
Epoch 3/3


Training Epoch 3: 100%|██████████| 54/54 [00:04<00:00, 11.61it/s, loss=0.312]


Epoch 3 Loss: 11.9925
Test Metrics: Precision=0.9090, Recall=0.9090, F1=0.9090

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 430, Split 2...


Map: 100%|██████████| 430/430 [00:00<00:00, 9396.67 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 7650.11 examples/s]
Map: 100%|██████████| 732/732 [00:00<00:00, 9233.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 54/54 [00:04<00:00, 11.32it/s, loss=0.449]


Epoch 1 Loss: 36.5395
Epoch 2/3


Training Epoch 2: 100%|██████████| 54/54 [00:04<00:00, 11.36it/s, loss=0.381]


Epoch 2 Loss: 18.1429
Epoch 3/3


Training Epoch 3: 100%|██████████| 54/54 [00:04<00:00, 11.76it/s, loss=0.194]


Epoch 3 Loss: 13.7534
Test Metrics: Precision=0.9046, Recall=0.9046, F1=0.9046

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 430, Split 3...


Map: 100%|██████████| 430/430 [00:00<00:00, 8535.46 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 8880.11 examples/s]
Map: 100%|██████████| 732/732 [00:00<00:00, 9310.13 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 54/54 [00:04<00:00, 11.23it/s, loss=0.252]


Epoch 1 Loss: 36.2208
Epoch 2/3


Training Epoch 2: 100%|██████████| 54/54 [00:04<00:00, 11.11it/s, loss=0.345]


Epoch 2 Loss: 17.5912
Epoch 3/3


Training Epoch 3: 100%|██████████| 54/54 [00:04<00:00, 11.10it/s, loss=0.22] 


Epoch 3 Loss: 13.0358
Test Metrics: Precision=0.9107, Recall=0.9107, F1=0.9107

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 430, Split 4...


Map: 100%|██████████| 430/430 [00:00<00:00, 9136.90 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 8077.71 examples/s]
Map: 100%|██████████| 732/732 [00:00<00:00, 8931.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 54/54 [00:04<00:00, 11.57it/s, loss=0.305]


Epoch 1 Loss: 35.2321
Epoch 2/3


Training Epoch 2: 100%|██████████| 54/54 [00:04<00:00, 11.77it/s, loss=0.182]


Epoch 2 Loss: 16.3963
Epoch 3/3


Training Epoch 3: 100%|██████████| 54/54 [00:04<00:00, 11.72it/s, loss=0.303] 


Epoch 3 Loss: 12.8960
Test Metrics: Precision=0.9106, Recall=0.9106, F1=0.9106

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 430, Split 5...


Map: 100%|██████████| 430/430 [00:00<00:00, 9061.38 examples/s]
Map: 100%|██████████| 86/86 [00:00<00:00, 7615.86 examples/s]
Map: 100%|██████████| 732/732 [00:00<00:00, 9347.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 54/54 [00:04<00:00, 10.96it/s, loss=0.575]


Epoch 1 Loss: 37.0171
Epoch 2/3


Training Epoch 2: 100%|██████████| 54/54 [00:04<00:00, 10.88it/s, loss=0.316]


Epoch 2 Loss: 19.2227
Epoch 3/3


Training Epoch 3: 100%|██████████| 54/54 [00:04<00:00, 11.11it/s, loss=0.21] 


Epoch 3 Loss: 13.7103
Test Metrics: Precision=0.9075, Recall=0.9075, F1=0.9075

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 435, Split 1...


Map: 100%|██████████| 435/435 [00:00<00:00, 8925.06 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 6696.35 examples/s]
Map: 100%|██████████| 726/726 [00:00<00:00, 9595.11 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:04<00:00, 11.34it/s, loss=0.18] 


Epoch 1 Loss: 34.1267
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:04<00:00, 11.55it/s, loss=0.259]


Epoch 2 Loss: 16.6309
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:04<00:00, 11.95it/s, loss=0.315]


Epoch 3 Loss: 12.2191
Test Metrics: Precision=0.9139, Recall=0.9139, F1=0.9139

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 435, Split 2...


Map: 100%|██████████| 435/435 [00:00<00:00, 8741.02 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 8190.71 examples/s]
Map: 100%|██████████| 726/726 [00:00<00:00, 8962.58 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:04<00:00, 11.50it/s, loss=0.263]


Epoch 1 Loss: 35.4435
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:04<00:00, 11.58it/s, loss=0.128]


Epoch 2 Loss: 17.6068
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:04<00:00, 11.70it/s, loss=0.307]


Epoch 3 Loss: 12.9808
Test Metrics: Precision=0.9079, Recall=0.9079, F1=0.9079

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 435, Split 3...


Map: 100%|██████████| 435/435 [00:00<00:00, 8381.63 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 7274.37 examples/s]
Map: 100%|██████████| 726/726 [00:00<00:00, 9445.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:05<00:00, 10.93it/s, loss=0.398]


Epoch 1 Loss: 34.9661
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:05<00:00, 10.82it/s, loss=0.347]


Epoch 2 Loss: 18.0656
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:04<00:00, 11.13it/s, loss=0.531]


Epoch 3 Loss: 13.5913
Test Metrics: Precision=0.9141, Recall=0.9141, F1=0.9141

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 435, Split 4...


Map: 100%|██████████| 435/435 [00:00<00:00, 9280.33 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 6162.37 examples/s]
Map: 100%|██████████| 726/726 [00:00<00:00, 9301.51 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:04<00:00, 11.69it/s, loss=0.312]


Epoch 1 Loss: 33.5538
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:04<00:00, 11.66it/s, loss=0.233]


Epoch 2 Loss: 16.1784
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:04<00:00, 11.98it/s, loss=0.155]


Epoch 3 Loss: 11.7309
Test Metrics: Precision=0.9116, Recall=0.9116, F1=0.9116

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 435, Split 5...


Map: 100%|██████████| 435/435 [00:00<00:00, 8900.46 examples/s]
Map: 100%|██████████| 87/87 [00:00<00:00, 7127.88 examples/s]
Map: 100%|██████████| 726/726 [00:00<00:00, 9338.74 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:05<00:00, 10.59it/s, loss=0.545]


Epoch 1 Loss: 34.7913
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:05<00:00, 10.91it/s, loss=0.359]


Epoch 2 Loss: 19.8771
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:04<00:00, 11.11it/s, loss=0.233]


Epoch 3 Loss: 14.2055
Test Metrics: Precision=0.9087, Recall=0.9087, F1=0.9087

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 440, Split 1...


Map: 100%|██████████| 440/440 [00:00<00:00, 8619.77 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7185.80 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9338.05 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:04<00:00, 11.52it/s, loss=0.422]


Epoch 1 Loss: 33.5244
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:04<00:00, 11.64it/s, loss=0.194]


Epoch 2 Loss: 17.1122
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:04<00:00, 11.55it/s, loss=0.298]


Epoch 3 Loss: 12.5985
Test Metrics: Precision=0.9105, Recall=0.9105, F1=0.9105

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 440, Split 2...


Map: 100%|██████████| 440/440 [00:00<00:00, 8993.46 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7732.89 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9226.16 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:04<00:00, 11.49it/s, loss=0.546]


Epoch 1 Loss: 34.9449
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:04<00:00, 11.51it/s, loss=0.306]


Epoch 2 Loss: 16.9396
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:04<00:00, 11.68it/s, loss=0.193]


Epoch 3 Loss: 12.3839
Test Metrics: Precision=0.9102, Recall=0.9102, F1=0.9102

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 440, Split 3...


Map: 100%|██████████| 440/440 [00:00<00:00, 8255.17 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7950.77 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9387.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:05<00:00, 10.94it/s, loss=0.432]


Epoch 1 Loss: 33.6043
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:05<00:00, 10.73it/s, loss=0.134]


Epoch 2 Loss: 17.1261
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:05<00:00, 10.36it/s, loss=0.124] 


Epoch 3 Loss: 12.4025
Test Metrics: Precision=0.9130, Recall=0.9130, F1=0.9130

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 440, Split 4...


Map: 100%|██████████| 440/440 [00:00<00:00, 9315.31 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 6484.63 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9207.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:04<00:00, 11.50it/s, loss=0.263]


Epoch 1 Loss: 33.8513
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:04<00:00, 11.53it/s, loss=0.262]


Epoch 2 Loss: 16.7651
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:04<00:00, 11.54it/s, loss=0.356] 


Epoch 3 Loss: 12.1756
Test Metrics: Precision=0.9143, Recall=0.9143, F1=0.9143

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 440, Split 5...


Map: 100%|██████████| 440/440 [00:00<00:00, 9014.90 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7208.96 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 2655.08 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:04<00:00, 11.18it/s, loss=0.439]


Epoch 1 Loss: 33.1898
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:04<00:00, 11.17it/s, loss=0.138]


Epoch 2 Loss: 17.5030
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:05<00:00, 10.94it/s, loss=0.149]


Epoch 3 Loss: 12.3801
Test Metrics: Precision=0.9134, Recall=0.9134, F1=0.9134

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 445, Split 1...


Map: 100%|██████████| 445/445 [00:00<00:00, 8635.21 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 8449.18 examples/s]
Map: 100%|██████████| 714/714 [00:00<00:00, 9292.13 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 56/56 [00:04<00:00, 11.58it/s, loss=0.257]


Epoch 1 Loss: 37.3341
Epoch 2/3


Training Epoch 2: 100%|██████████| 56/56 [00:04<00:00, 11.66it/s, loss=0.232]


Epoch 2 Loss: 17.9483
Epoch 3/3


Training Epoch 3: 100%|██████████| 56/56 [00:04<00:00, 11.80it/s, loss=0.193]


Epoch 3 Loss: 13.1143
Test Metrics: Precision=0.9119, Recall=0.9119, F1=0.9119

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 445, Split 2...


Map: 100%|██████████| 445/445 [00:00<00:00, 8924.44 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 8175.31 examples/s]
Map: 100%|██████████| 714/714 [00:00<00:00, 9078.75 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 56/56 [00:04<00:00, 11.49it/s, loss=0.256]


Epoch 1 Loss: 34.8089
Epoch 2/3


Training Epoch 2: 100%|██████████| 56/56 [00:04<00:00, 11.89it/s, loss=0.13] 


Epoch 2 Loss: 17.1600
Epoch 3/3


Training Epoch 3: 100%|██████████| 56/56 [00:04<00:00, 11.43it/s, loss=0.207] 


Epoch 3 Loss: 12.5153
Test Metrics: Precision=0.9089, Recall=0.9089, F1=0.9089

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 445, Split 3...


Map: 100%|██████████| 445/445 [00:00<00:00, 8414.32 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 7331.69 examples/s]
Map: 100%|██████████| 714/714 [00:00<00:00, 9576.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 56/56 [00:05<00:00, 10.78it/s, loss=0.419]


Epoch 1 Loss: 36.3138
Epoch 2/3


Training Epoch 2: 100%|██████████| 56/56 [00:05<00:00, 11.09it/s, loss=0.224]


Epoch 2 Loss: 18.3296
Epoch 3/3


Training Epoch 3: 100%|██████████| 56/56 [00:05<00:00, 11.06it/s, loss=0.205]


Epoch 3 Loss: 13.1407
Test Metrics: Precision=0.9135, Recall=0.9135, F1=0.9135

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 445, Split 4...


Map: 100%|██████████| 445/445 [00:00<00:00, 9420.36 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 7293.59 examples/s]
Map: 100%|██████████| 714/714 [00:00<00:00, 8928.92 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 56/56 [00:04<00:00, 11.87it/s, loss=0.302]


Epoch 1 Loss: 36.2313
Epoch 2/3


Training Epoch 2: 100%|██████████| 56/56 [00:04<00:00, 11.68it/s, loss=0.569]


Epoch 2 Loss: 17.9234
Epoch 3/3


Training Epoch 3: 100%|██████████| 56/56 [00:04<00:00, 12.11it/s, loss=0.214] 


Epoch 3 Loss: 13.2334
Test Metrics: Precision=0.9098, Recall=0.9098, F1=0.9098

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 445, Split 5...


Map: 100%|██████████| 445/445 [00:00<00:00, 8635.89 examples/s]
Map: 100%|██████████| 89/89 [00:00<00:00, 7519.40 examples/s]
Map: 100%|██████████| 714/714 [00:00<00:00, 9166.98 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 56/56 [00:05<00:00, 10.91it/s, loss=0.364]


Epoch 1 Loss: 36.2782
Epoch 2/3


Training Epoch 2: 100%|██████████| 56/56 [00:04<00:00, 11.21it/s, loss=0.336]


Epoch 2 Loss: 19.1918
Epoch 3/3


Training Epoch 3: 100%|██████████| 56/56 [00:05<00:00, 10.84it/s, loss=0.211]


Epoch 3 Loss: 13.7186
Test Metrics: Precision=0.9090, Recall=0.9090, F1=0.9090

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 450, Split 1...


Map: 100%|██████████| 450/450 [00:00<00:00, 8062.56 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 8183.48 examples/s]
Map: 100%|██████████| 708/708 [00:00<00:00, 9066.66 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 57/57 [00:04<00:00, 11.62it/s, loss=0.38] 


Epoch 1 Loss: 36.3974
Epoch 2/3


Training Epoch 2: 100%|██████████| 57/57 [00:04<00:00, 11.64it/s, loss=0.293]


Epoch 2 Loss: 17.6398
Epoch 3/3


Training Epoch 3: 100%|██████████| 57/57 [00:04<00:00, 11.71it/s, loss=0.599] 


Epoch 3 Loss: 13.4169
Test Metrics: Precision=0.9103, Recall=0.9103, F1=0.9103

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 450, Split 2...


Map: 100%|██████████| 450/450 [00:00<00:00, 8969.98 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 7285.57 examples/s]
Map: 100%|██████████| 708/708 [00:00<00:00, 8965.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 57/57 [00:04<00:00, 11.42it/s, loss=0.491]


Epoch 1 Loss: 35.8959
Epoch 2/3


Training Epoch 2: 100%|██████████| 57/57 [00:04<00:00, 11.81it/s, loss=0.149]


Epoch 2 Loss: 18.0359
Epoch 3/3


Training Epoch 3: 100%|██████████| 57/57 [00:04<00:00, 11.55it/s, loss=0.365]


Epoch 3 Loss: 13.6197
Test Metrics: Precision=0.9100, Recall=0.9100, F1=0.9100

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 450, Split 3...


Map: 100%|██████████| 450/450 [00:00<00:00, 8641.08 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 7530.47 examples/s]
Map: 100%|██████████| 708/708 [00:00<00:00, 9544.80 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 57/57 [00:05<00:00, 10.83it/s, loss=0.271]


Epoch 1 Loss: 39.1551
Epoch 2/3


Training Epoch 2: 100%|██████████| 57/57 [00:05<00:00, 11.01it/s, loss=0.321]


Epoch 2 Loss: 20.3275
Epoch 3/3


Training Epoch 3: 100%|██████████| 57/57 [00:05<00:00, 10.81it/s, loss=0.226]


Epoch 3 Loss: 14.7126
Test Metrics: Precision=0.9092, Recall=0.9092, F1=0.9092

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 450, Split 4...


Map: 100%|██████████| 450/450 [00:00<00:00, 9578.08 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 7773.79 examples/s]
Map: 100%|██████████| 708/708 [00:00<00:00, 9000.98 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 57/57 [00:04<00:00, 11.85it/s, loss=0.179]


Epoch 1 Loss: 35.6431
Epoch 2/3


Training Epoch 2: 100%|██████████| 57/57 [00:04<00:00, 11.70it/s, loss=0.187]


Epoch 2 Loss: 17.0824
Epoch 3/3


Training Epoch 3: 100%|██████████| 57/57 [00:04<00:00, 11.62it/s, loss=0.283] 


Epoch 3 Loss: 13.0312
Test Metrics: Precision=0.9133, Recall=0.9133, F1=0.9133

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 450, Split 5...


Map: 100%|██████████| 450/450 [00:00<00:00, 9023.59 examples/s]
Map: 100%|██████████| 90/90 [00:00<00:00, 6543.83 examples/s]
Map: 100%|██████████| 708/708 [00:00<00:00, 9517.42 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 57/57 [00:04<00:00, 11.62it/s, loss=0.126]


Epoch 1 Loss: 34.6913
Epoch 2/3


Training Epoch 2: 100%|██████████| 57/57 [00:05<00:00, 11.15it/s, loss=0.31] 


Epoch 2 Loss: 17.4440
Epoch 3/3


Training Epoch 3: 100%|██████████| 57/57 [00:05<00:00, 11.19it/s, loss=0.0974]


Epoch 3 Loss: 12.7348
Test Metrics: Precision=0.9116, Recall=0.9116, F1=0.9116

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 455, Split 1...


Map: 100%|██████████| 455/455 [00:00<00:00, 8878.67 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 7638.22 examples/s]
Map: 100%|██████████| 702/702 [00:00<00:00, 9285.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 57/57 [00:04<00:00, 11.73it/s, loss=0.427]


Epoch 1 Loss: 39.3067
Epoch 2/3


Training Epoch 2: 100%|██████████| 57/57 [00:04<00:00, 11.72it/s, loss=0.278]


Epoch 2 Loss: 18.0336
Epoch 3/3


Training Epoch 3: 100%|██████████| 57/57 [00:04<00:00, 11.59it/s, loss=0.164]


Epoch 3 Loss: 13.0478
Test Metrics: Precision=0.9106, Recall=0.9106, F1=0.9106

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 455, Split 2...


Map: 100%|██████████| 455/455 [00:00<00:00, 9435.89 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 8104.51 examples/s]
Map: 100%|██████████| 702/702 [00:00<00:00, 9104.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 57/57 [00:04<00:00, 11.78it/s, loss=0.335]


Epoch 1 Loss: 35.6111
Epoch 2/3


Training Epoch 2: 100%|██████████| 57/57 [00:04<00:00, 11.43it/s, loss=0.123]


Epoch 2 Loss: 18.2208
Epoch 3/3


Training Epoch 3: 100%|██████████| 57/57 [00:04<00:00, 11.44it/s, loss=0.374]


Epoch 3 Loss: 13.2342
Test Metrics: Precision=0.9091, Recall=0.9091, F1=0.9091

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 455, Split 3...


Map: 100%|██████████| 455/455 [00:00<00:00, 8372.97 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 7766.76 examples/s]
Map: 100%|██████████| 702/702 [00:00<00:00, 9443.18 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 57/57 [00:05<00:00, 10.43it/s, loss=0.387]


Epoch 1 Loss: 38.8227
Epoch 2/3


Training Epoch 2: 100%|██████████| 57/57 [00:05<00:00, 10.92it/s, loss=0.199]


Epoch 2 Loss: 20.0315
Epoch 3/3


Training Epoch 3: 100%|██████████| 57/57 [00:05<00:00, 11.19it/s, loss=0.269]


Epoch 3 Loss: 14.2459
Test Metrics: Precision=0.9106, Recall=0.9106, F1=0.9106

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 455, Split 4...


Map: 100%|██████████| 455/455 [00:00<00:00, 9159.76 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 7145.72 examples/s]
Map: 100%|██████████| 702/702 [00:00<00:00, 8908.42 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 57/57 [00:04<00:00, 11.44it/s, loss=0.394]


Epoch 1 Loss: 35.1995
Epoch 2/3


Training Epoch 2: 100%|██████████| 57/57 [00:04<00:00, 11.53it/s, loss=0.584]


Epoch 2 Loss: 17.3767
Epoch 3/3


Training Epoch 3: 100%|██████████| 57/57 [00:05<00:00, 11.30it/s, loss=0.278] 


Epoch 3 Loss: 12.5390
Test Metrics: Precision=0.9126, Recall=0.9126, F1=0.9126

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 455, Split 5...


Map: 100%|██████████| 455/455 [00:00<00:00, 8582.75 examples/s]
Map: 100%|██████████| 91/91 [00:00<00:00, 7875.41 examples/s]
Map: 100%|██████████| 702/702 [00:00<00:00, 9375.55 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 57/57 [00:05<00:00, 10.80it/s, loss=0.513]


Epoch 1 Loss: 36.7198
Epoch 2/3


Training Epoch 2: 100%|██████████| 57/57 [00:05<00:00, 10.84it/s, loss=0.25] 


Epoch 2 Loss: 17.5091
Epoch 3/3


Training Epoch 3: 100%|██████████| 57/57 [00:05<00:00, 11.18it/s, loss=0.2]   


Epoch 3 Loss: 12.7292
Test Metrics: Precision=0.9124, Recall=0.9124, F1=0.9124

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 460, Split 1...


Map: 100%|██████████| 460/460 [00:00<00:00, 8704.00 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 8164.96 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9152.64 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:05<00:00, 11.37it/s, loss=0.729]


Epoch 1 Loss: 34.8566
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:04<00:00, 11.76it/s, loss=0.149]


Epoch 2 Loss: 17.0280
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:05<00:00, 11.56it/s, loss=0.187]


Epoch 3 Loss: 12.4195
Test Metrics: Precision=0.9143, Recall=0.9143, F1=0.9143

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 460, Split 2...


Map: 100%|██████████| 460/460 [00:00<00:00, 9186.30 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7656.88 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9158.47 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:04<00:00, 11.67it/s, loss=0.302]


Epoch 1 Loss: 36.1110
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:04<00:00, 11.62it/s, loss=0.476]


Epoch 2 Loss: 18.0229
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:04<00:00, 11.74it/s, loss=0.138] 


Epoch 3 Loss: 12.9203
Test Metrics: Precision=0.9095, Recall=0.9095, F1=0.9095

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 460, Split 3...


Map: 100%|██████████| 460/460 [00:00<00:00, 8352.47 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 8170.32 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9530.58 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:05<00:00, 10.84it/s, loss=0.367]


Epoch 1 Loss: 37.9000
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:05<00:00, 10.73it/s, loss=0.427]


Epoch 2 Loss: 18.4058
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:05<00:00, 10.90it/s, loss=0.246] 


Epoch 3 Loss: 13.4279
Test Metrics: Precision=0.9130, Recall=0.9130, F1=0.9130

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 460, Split 4...


Map: 100%|██████████| 460/460 [00:00<00:00, 9169.53 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7009.81 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9253.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:04<00:00, 11.62it/s, loss=0.328]


Epoch 1 Loss: 35.2628
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:05<00:00, 11.51it/s, loss=0.357]


Epoch 2 Loss: 17.7712
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:04<00:00, 11.75it/s, loss=0.22] 


Epoch 3 Loss: 12.9524
Test Metrics: Precision=0.9115, Recall=0.9115, F1=0.9115

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 460, Split 5...


Map: 100%|██████████| 460/460 [00:00<00:00, 8634.81 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 8704.62 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9053.55 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:05<00:00, 10.61it/s, loss=0.259]


Epoch 1 Loss: 37.5737
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:05<00:00, 11.05it/s, loss=0.49] 


Epoch 2 Loss: 19.8464
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:05<00:00, 10.81it/s, loss=0.248] 


Epoch 3 Loss: 14.0642
Test Metrics: Precision=0.9087, Recall=0.9087, F1=0.9087

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 465, Split 1...


Map: 100%|██████████| 465/465 [00:00<00:00, 8711.20 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 8382.84 examples/s]
Map: 100%|██████████| 690/690 [00:00<00:00, 9149.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 59/59 [00:05<00:00, 11.63it/s, loss=0.402]


Epoch 1 Loss: 36.1193
Epoch 2/3


Training Epoch 2: 100%|██████████| 59/59 [00:05<00:00, 11.55it/s, loss=0.371]


Epoch 2 Loss: 18.0239
Epoch 3/3


Training Epoch 3: 100%|██████████| 59/59 [00:05<00:00, 11.36it/s, loss=0.156] 


Epoch 3 Loss: 13.0931
Test Metrics: Precision=0.9125, Recall=0.9125, F1=0.9125

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 465, Split 2...


Map: 100%|██████████| 465/465 [00:00<00:00, 9143.66 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 6929.41 examples/s]
Map: 100%|██████████| 690/690 [00:00<00:00, 9271.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 59/59 [00:05<00:00, 11.44it/s, loss=0.226]


Epoch 1 Loss: 38.6089
Epoch 2/3


Training Epoch 2: 100%|██████████| 59/59 [00:05<00:00, 11.35it/s, loss=0.0212]


Epoch 2 Loss: 18.5887
Epoch 3/3


Training Epoch 3: 100%|██████████| 59/59 [00:04<00:00, 11.89it/s, loss=0.131]


Epoch 3 Loss: 14.1602
Test Metrics: Precision=0.9106, Recall=0.9106, F1=0.9106

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 465, Split 3...


Map: 100%|██████████| 465/465 [00:00<00:00, 8813.00 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 8279.99 examples/s]
Map: 100%|██████████| 690/690 [00:00<00:00, 9547.73 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 59/59 [00:05<00:00, 11.09it/s, loss=0.542]


Epoch 1 Loss: 37.1613
Epoch 2/3


Training Epoch 2: 100%|██████████| 59/59 [00:05<00:00, 10.99it/s, loss=0.273]


Epoch 2 Loss: 18.4898
Epoch 3/3


Training Epoch 3: 100%|██████████| 59/59 [00:05<00:00, 11.05it/s, loss=0.3]  


Epoch 3 Loss: 13.5176
Test Metrics: Precision=0.9118, Recall=0.9118, F1=0.9118

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 465, Split 4...


Map: 100%|██████████| 465/465 [00:00<00:00, 9357.25 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 7037.17 examples/s]
Map: 100%|██████████| 690/690 [00:00<00:00, 9147.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 59/59 [00:05<00:00, 11.13it/s, loss=0.329]


Epoch 1 Loss: 34.2329
Epoch 2/3


Training Epoch 2: 100%|██████████| 59/59 [00:05<00:00, 11.36it/s, loss=0.33] 


Epoch 2 Loss: 17.3160
Epoch 3/3


Training Epoch 3: 100%|██████████| 59/59 [00:05<00:00, 11.36it/s, loss=0.151] 


Epoch 3 Loss: 12.9072
Test Metrics: Precision=0.9104, Recall=0.9104, F1=0.9104

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 465, Split 5...


Map: 100%|██████████| 465/465 [00:00<00:00, 8479.24 examples/s]
Map: 100%|██████████| 93/93 [00:00<00:00, 7391.19 examples/s]
Map: 100%|██████████| 690/690 [00:00<00:00, 9280.89 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 59/59 [00:05<00:00, 10.68it/s, loss=0.605]


Epoch 1 Loss: 36.9834
Epoch 2/3


Training Epoch 2: 100%|██████████| 59/59 [00:05<00:00, 10.95it/s, loss=0.238]


Epoch 2 Loss: 17.8189
Epoch 3/3


Training Epoch 3: 100%|██████████| 59/59 [00:05<00:00, 11.11it/s, loss=0.0226]


Epoch 3 Loss: 12.6790
Test Metrics: Precision=0.9155, Recall=0.9155, F1=0.9155

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 470, Split 1...


Map: 100%|██████████| 470/470 [00:00<00:00, 9023.72 examples/s]
Map: 100%|██████████| 94/94 [00:00<00:00, 7468.26 examples/s]
Map: 100%|██████████| 684/684 [00:00<00:00, 9360.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 59/59 [00:05<00:00, 11.36it/s, loss=0.534]


Epoch 1 Loss: 34.7832
Epoch 2/3


Training Epoch 2: 100%|██████████| 59/59 [00:05<00:00, 11.53it/s, loss=0.18]  


Epoch 2 Loss: 17.7124
Epoch 3/3


Training Epoch 3: 100%|██████████| 59/59 [00:05<00:00, 11.47it/s, loss=0.24] 


Epoch 3 Loss: 12.9758
Test Metrics: Precision=0.9138, Recall=0.9138, F1=0.9138

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 470, Split 2...


Map: 100%|██████████| 470/470 [00:00<00:00, 9127.26 examples/s]
Map: 100%|██████████| 94/94 [00:00<00:00, 8120.79 examples/s]
Map: 100%|██████████| 684/684 [00:00<00:00, 8871.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 59/59 [00:05<00:00, 11.47it/s, loss=0.371]


Epoch 1 Loss: 34.8369
Epoch 2/3


Training Epoch 2: 100%|██████████| 59/59 [00:05<00:00, 11.75it/s, loss=0.249]


Epoch 2 Loss: 17.6978
Epoch 3/3


Training Epoch 3: 100%|██████████| 59/59 [00:05<00:00, 11.51it/s, loss=0.225]


Epoch 3 Loss: 12.9371
Test Metrics: Precision=0.9114, Recall=0.9114, F1=0.9114

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 470, Split 3...


Map: 100%|██████████| 470/470 [00:00<00:00, 8613.40 examples/s]
Map: 100%|██████████| 94/94 [00:00<00:00, 7094.28 examples/s]
Map: 100%|██████████| 684/684 [00:00<00:00, 9693.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 59/59 [00:05<00:00, 10.56it/s, loss=0.722]


Epoch 1 Loss: 35.9460
Epoch 2/3


Training Epoch 2: 100%|██████████| 59/59 [00:05<00:00, 10.60it/s, loss=0.574]


Epoch 2 Loss: 18.8112
Epoch 3/3


Training Epoch 3: 100%|██████████| 59/59 [00:05<00:00, 10.91it/s, loss=0.18] 


Epoch 3 Loss: 13.1015
Test Metrics: Precision=0.9155, Recall=0.9155, F1=0.9155

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 470, Split 4...


Map: 100%|██████████| 470/470 [00:00<00:00, 8854.11 examples/s]
Map: 100%|██████████| 94/94 [00:00<00:00, 6407.89 examples/s]
Map: 100%|██████████| 684/684 [00:00<00:00, 9337.79 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 59/59 [00:05<00:00, 11.27it/s, loss=0.401]


Epoch 1 Loss: 37.8637
Epoch 2/3


Training Epoch 2: 100%|██████████| 59/59 [00:05<00:00, 11.38it/s, loss=0.778]


Epoch 2 Loss: 19.0881
Epoch 3/3


Training Epoch 3: 100%|██████████| 59/59 [00:05<00:00, 11.56it/s, loss=0.205]


Epoch 3 Loss: 14.2482
Test Metrics: Precision=0.9071, Recall=0.9071, F1=0.9071

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 470, Split 5...


Map: 100%|██████████| 470/470 [00:00<00:00, 8896.06 examples/s]
Map: 100%|██████████| 94/94 [00:00<00:00, 7161.55 examples/s]
Map: 100%|██████████| 684/684 [00:00<00:00, 9447.35 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 59/59 [00:05<00:00, 10.62it/s, loss=0.426]


Epoch 1 Loss: 38.3187
Epoch 2/3


Training Epoch 2: 100%|██████████| 59/59 [00:05<00:00, 10.92it/s, loss=0.632]


Epoch 2 Loss: 19.7455
Epoch 3/3


Training Epoch 3: 100%|██████████| 59/59 [00:05<00:00, 10.90it/s, loss=0.157]


Epoch 3 Loss: 14.1295
Test Metrics: Precision=0.9095, Recall=0.9095, F1=0.9095

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 475, Split 1...


Map: 100%|██████████| 475/475 [00:00<00:00, 9168.78 examples/s]
Map: 100%|██████████| 95/95 [00:00<00:00, 7149.55 examples/s]
Map: 100%|██████████| 678/678 [00:00<00:00, 9298.58 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:05<00:00, 11.72it/s, loss=0.691]


Epoch 1 Loss: 36.5676
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:05<00:00, 11.82it/s, loss=0.3]  


Epoch 2 Loss: 17.8513
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:05<00:00, 11.88it/s, loss=0.145] 


Epoch 3 Loss: 12.8164
Test Metrics: Precision=0.9115, Recall=0.9115, F1=0.9115

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 475, Split 2...


Map: 100%|██████████| 475/475 [00:00<00:00, 8826.55 examples/s]
Map: 100%|██████████| 95/95 [00:00<00:00, 6656.29 examples/s]
Map: 100%|██████████| 678/678 [00:00<00:00, 9248.32 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:05<00:00, 11.83it/s, loss=0.166]


Epoch 1 Loss: 37.0322
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:05<00:00, 11.97it/s, loss=0.246]


Epoch 2 Loss: 17.5604
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:05<00:00, 11.54it/s, loss=0.268]


Epoch 3 Loss: 12.8195
Test Metrics: Precision=0.9141, Recall=0.9141, F1=0.9141

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 475, Split 3...


Map: 100%|██████████| 475/475 [00:00<00:00, 8935.54 examples/s]
Map: 100%|██████████| 95/95 [00:00<00:00, 8361.85 examples/s]
Map: 100%|██████████| 678/678 [00:00<00:00, 9544.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:05<00:00, 11.04it/s, loss=0.197]


Epoch 1 Loss: 36.5292
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:05<00:00, 10.87it/s, loss=0.299]


Epoch 2 Loss: 18.5175
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:05<00:00, 10.91it/s, loss=0.236] 


Epoch 3 Loss: 13.2107
Test Metrics: Precision=0.9160, Recall=0.9160, F1=0.9160

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 475, Split 4...


Map: 100%|██████████| 475/475 [00:00<00:00, 9006.88 examples/s]
Map: 100%|██████████| 95/95 [00:00<00:00, 7757.40 examples/s]
Map: 100%|██████████| 678/678 [00:00<00:00, 8949.16 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:05<00:00, 11.44it/s, loss=0.34] 


Epoch 1 Loss: 35.9248
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:05<00:00, 11.67it/s, loss=0.278]


Epoch 2 Loss: 18.0018
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:05<00:00, 11.76it/s, loss=0.207] 


Epoch 3 Loss: 13.4316
Test Metrics: Precision=0.9137, Recall=0.9137, F1=0.9137

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 475, Split 5...


Map: 100%|██████████| 475/475 [00:00<00:00, 8826.01 examples/s]
Map: 100%|██████████| 95/95 [00:00<00:00, 8201.44 examples/s]
Map: 100%|██████████| 678/678 [00:00<00:00, 8992.34 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:05<00:00, 11.03it/s, loss=0.594]


Epoch 1 Loss: 37.0853
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:05<00:00, 10.95it/s, loss=0.26] 


Epoch 2 Loss: 19.5352
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:05<00:00, 11.09it/s, loss=0.165]


Epoch 3 Loss: 13.5577
Test Metrics: Precision=0.9139, Recall=0.9139, F1=0.9139

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 480, Split 1...


Map: 100%|██████████| 480/480 [00:00<00:00, 9008.06 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 8154.51 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 8991.78 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:05<00:00, 11.42it/s, loss=0.479]


Epoch 1 Loss: 37.5634
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:05<00:00, 11.43it/s, loss=0.242]


Epoch 2 Loss: 17.5913
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:05<00:00, 11.50it/s, loss=0.143] 


Epoch 3 Loss: 12.8252
Test Metrics: Precision=0.9116, Recall=0.9116, F1=0.9116

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 480, Split 2...


Map: 100%|██████████| 480/480 [00:00<00:00, 9038.19 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7562.27 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9048.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:05<00:00, 11.90it/s, loss=0.345]


Epoch 1 Loss: 36.2157
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:05<00:00, 11.46it/s, loss=0.3]  


Epoch 2 Loss: 17.1061
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:05<00:00, 11.94it/s, loss=0.137]


Epoch 3 Loss: 12.5044
Test Metrics: Precision=0.9126, Recall=0.9126, F1=0.9126

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 480, Split 3...


Map: 100%|██████████| 480/480 [00:00<00:00, 8731.16 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7694.21 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9543.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:05<00:00, 10.75it/s, loss=0.393]


Epoch 1 Loss: 37.4474
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:05<00:00, 10.68it/s, loss=0.261]


Epoch 2 Loss: 19.5103
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:05<00:00, 10.68it/s, loss=0.209]


Epoch 3 Loss: 14.1861
Test Metrics: Precision=0.9128, Recall=0.9128, F1=0.9128

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 480, Split 4...


Map: 100%|██████████| 480/480 [00:00<00:00, 9474.15 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 8511.13 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9074.84 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:05<00:00, 11.16it/s, loss=0.209]


Epoch 1 Loss: 38.1007
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:05<00:00, 11.48it/s, loss=0.22] 


Epoch 2 Loss: 18.5064
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:05<00:00, 11.42it/s, loss=0.269] 


Epoch 3 Loss: 13.1949
Test Metrics: Precision=0.9141, Recall=0.9141, F1=0.9141

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 480, Split 5...


Map: 100%|██████████| 480/480 [00:00<00:00, 8988.80 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 8610.32 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9266.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:05<00:00, 10.95it/s, loss=0.377]


Epoch 1 Loss: 37.5935
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:05<00:00, 11.11it/s, loss=0.281]


Epoch 2 Loss: 19.3154
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:05<00:00, 10.95it/s, loss=0.218]


Epoch 3 Loss: 13.6039
Test Metrics: Precision=0.9145, Recall=0.9145, F1=0.9145

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 485, Split 1...


Map: 100%|██████████| 485/485 [00:00<00:00, 8358.48 examples/s]
Map: 100%|██████████| 97/97 [00:00<00:00, 6793.81 examples/s]
Map: 100%|██████████| 666/666 [00:00<00:00, 9229.61 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 61/61 [00:05<00:00, 11.54it/s, loss=0.439]


Epoch 1 Loss: 36.9206
Epoch 2/3


Training Epoch 2: 100%|██████████| 61/61 [00:05<00:00, 11.56it/s, loss=0.26] 


Epoch 2 Loss: 18.0760
Epoch 3/3


Training Epoch 3: 100%|██████████| 61/61 [00:05<00:00, 11.51it/s, loss=0.129] 


Epoch 3 Loss: 12.6455
Test Metrics: Precision=0.9117, Recall=0.9117, F1=0.9117

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 485, Split 2...


Map: 100%|██████████| 485/485 [00:00<00:00, 8490.32 examples/s]
Map: 100%|██████████| 97/97 [00:00<00:00, 6924.71 examples/s]
Map: 100%|██████████| 666/666 [00:00<00:00, 9407.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 61/61 [00:05<00:00, 11.53it/s, loss=0.83] 


Epoch 1 Loss: 37.7559
Epoch 2/3


Training Epoch 2: 100%|██████████| 61/61 [00:05<00:00, 12.03it/s, loss=0.357]


Epoch 2 Loss: 18.7814
Epoch 3/3


Training Epoch 3: 100%|██████████| 61/61 [00:05<00:00, 11.37it/s, loss=0.18]  


Epoch 3 Loss: 13.3232
Test Metrics: Precision=0.9120, Recall=0.9120, F1=0.9120

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 485, Split 3...


Map: 100%|██████████| 485/485 [00:00<00:00, 8564.24 examples/s]
Map: 100%|██████████| 97/97 [00:00<00:00, 8157.34 examples/s]
Map: 100%|██████████| 666/666 [00:00<00:00, 9564.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 61/61 [00:05<00:00, 10.98it/s, loss=0.47] 


Epoch 1 Loss: 38.6130
Epoch 2/3


Training Epoch 2: 100%|██████████| 61/61 [00:05<00:00, 10.93it/s, loss=0.633]


Epoch 2 Loss: 20.1556
Epoch 3/3


Training Epoch 3: 100%|██████████| 61/61 [00:05<00:00, 10.89it/s, loss=0.238]


Epoch 3 Loss: 14.5375
Test Metrics: Precision=0.9164, Recall=0.9164, F1=0.9164

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 485, Split 4...


Map: 100%|██████████| 485/485 [00:00<00:00, 9319.48 examples/s]
Map: 100%|██████████| 97/97 [00:00<00:00, 7749.92 examples/s]
Map: 100%|██████████| 666/666 [00:00<00:00, 9057.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 61/61 [00:05<00:00, 11.36it/s, loss=0.359]


Epoch 1 Loss: 37.8040
Epoch 2/3


Training Epoch 2: 100%|██████████| 61/61 [00:05<00:00, 11.32it/s, loss=0.249]


Epoch 2 Loss: 18.1956
Epoch 3/3


Training Epoch 3: 100%|██████████| 61/61 [00:05<00:00, 11.48it/s, loss=0.116] 


Epoch 3 Loss: 12.8997
Test Metrics: Precision=0.9092, Recall=0.9092, F1=0.9092

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 485, Split 5...


Map: 100%|██████████| 485/485 [00:00<00:00, 8808.43 examples/s]
Map: 100%|██████████| 97/97 [00:00<00:00, 9290.88 examples/s]
Map: 100%|██████████| 666/666 [00:00<00:00, 9022.98 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 61/61 [00:05<00:00, 11.08it/s, loss=0.206]


Epoch 1 Loss: 37.3825
Epoch 2/3


Training Epoch 2: 100%|██████████| 61/61 [00:05<00:00, 11.18it/s, loss=0.346]


Epoch 2 Loss: 19.2446
Epoch 3/3


Training Epoch 3: 100%|██████████| 61/61 [00:05<00:00, 10.92it/s, loss=0.122]


Epoch 3 Loss: 14.0515
Test Metrics: Precision=0.9138, Recall=0.9138, F1=0.9138

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 490, Split 1...


Map: 100%|██████████| 490/490 [00:00<00:00, 8679.93 examples/s]
Map: 100%|██████████| 98/98 [00:00<00:00, 8223.63 examples/s]
Map: 100%|██████████| 660/660 [00:00<00:00, 9348.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 62/62 [00:05<00:00, 11.37it/s, loss=0.353]


Epoch 1 Loss: 37.9943
Epoch 2/3


Training Epoch 2: 100%|██████████| 62/62 [00:05<00:00, 11.33it/s, loss=0.256]


Epoch 2 Loss: 18.1822
Epoch 3/3


Training Epoch 3: 100%|██████████| 62/62 [00:05<00:00, 11.87it/s, loss=0.219] 


Epoch 3 Loss: 12.6107
Test Metrics: Precision=0.9108, Recall=0.9108, F1=0.9108

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 490, Split 2...


Map: 100%|██████████| 490/490 [00:00<00:00, 9123.31 examples/s]
Map: 100%|██████████| 98/98 [00:00<00:00, 8209.83 examples/s]
Map: 100%|██████████| 660/660 [00:00<00:00, 9174.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 62/62 [00:05<00:00, 11.55it/s, loss=0.294]


Epoch 1 Loss: 37.4280
Epoch 2/3


Training Epoch 2: 100%|██████████| 62/62 [00:05<00:00, 11.71it/s, loss=0.1]  


Epoch 2 Loss: 18.7603
Epoch 3/3


Training Epoch 3: 100%|██████████| 62/62 [00:05<00:00, 11.65it/s, loss=0.213] 


Epoch 3 Loss: 13.9174
Test Metrics: Precision=0.9137, Recall=0.9137, F1=0.9137

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 490, Split 3...


Map: 100%|██████████| 490/490 [00:00<00:00, 8588.20 examples/s]
Map: 100%|██████████| 98/98 [00:00<00:00, 7697.99 examples/s]
Map: 100%|██████████| 660/660 [00:00<00:00, 8628.48 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 62/62 [00:05<00:00, 11.00it/s, loss=0.271]


Epoch 1 Loss: 40.2836
Epoch 2/3


Training Epoch 2: 100%|██████████| 62/62 [00:05<00:00, 11.01it/s, loss=0.131]


Epoch 2 Loss: 20.0408
Epoch 3/3


Training Epoch 3: 100%|██████████| 62/62 [00:05<00:00, 10.70it/s, loss=0.174]


Epoch 3 Loss: 14.8267
Test Metrics: Precision=0.9136, Recall=0.9136, F1=0.9136

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 490, Split 4...


Map: 100%|██████████| 490/490 [00:00<00:00, 8593.77 examples/s]
Map: 100%|██████████| 98/98 [00:00<00:00, 7717.21 examples/s]
Map: 100%|██████████| 660/660 [00:00<00:00, 9327.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 62/62 [00:05<00:00, 11.38it/s, loss=1.16] 


Epoch 1 Loss: 42.2707
Epoch 2/3


Training Epoch 2: 100%|██████████| 62/62 [00:05<00:00, 11.27it/s, loss=0.262]


Epoch 2 Loss: 22.4035
Epoch 3/3


Training Epoch 3: 100%|██████████| 62/62 [00:05<00:00, 11.31it/s, loss=0.114]


Epoch 3 Loss: 16.3281
Test Metrics: Precision=0.9062, Recall=0.9062, F1=0.9062

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 490, Split 5...


Map: 100%|██████████| 490/490 [00:00<00:00, 8552.47 examples/s]
Map: 100%|██████████| 98/98 [00:00<00:00, 7279.33 examples/s]
Map: 100%|██████████| 660/660 [00:00<00:00, 9191.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 62/62 [00:05<00:00, 11.17it/s, loss=0.136]


Epoch 1 Loss: 37.5346
Epoch 2/3


Training Epoch 2: 100%|██████████| 62/62 [00:05<00:00, 11.02it/s, loss=0.174]


Epoch 2 Loss: 17.9504
Epoch 3/3


Training Epoch 3: 100%|██████████| 62/62 [00:05<00:00, 11.55it/s, loss=0.0926]


Epoch 3 Loss: 13.0276
Test Metrics: Precision=0.9152, Recall=0.9152, F1=0.9152

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 495, Split 1...


Map: 100%|██████████| 495/495 [00:00<00:00, 8745.64 examples/s]
Map: 100%|██████████| 99/99 [00:00<00:00, 7354.65 examples/s]
Map: 100%|██████████| 654/654 [00:00<00:00, 9380.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 62/62 [00:05<00:00, 11.41it/s, loss=0.239]


Epoch 1 Loss: 35.9526
Epoch 2/3


Training Epoch 2: 100%|██████████| 62/62 [00:05<00:00, 11.53it/s, loss=0.172]


Epoch 2 Loss: 17.6365
Epoch 3/3


Training Epoch 3: 100%|██████████| 62/62 [00:05<00:00, 11.08it/s, loss=0.189] 


Epoch 3 Loss: 12.5756
Test Metrics: Precision=0.9133, Recall=0.9133, F1=0.9133

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 495, Split 2...


Map: 100%|██████████| 495/495 [00:00<00:00, 8825.42 examples/s]
Map: 100%|██████████| 99/99 [00:00<00:00, 6950.61 examples/s]
Map: 100%|██████████| 654/654 [00:00<00:00, 8999.32 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 62/62 [00:05<00:00, 11.48it/s, loss=0.458]


Epoch 1 Loss: 36.8053
Epoch 2/3


Training Epoch 2: 100%|██████████| 62/62 [00:05<00:00, 11.68it/s, loss=0.242]


Epoch 2 Loss: 18.4140
Epoch 3/3


Training Epoch 3: 100%|██████████| 62/62 [00:05<00:00, 11.62it/s, loss=0.162]


Epoch 3 Loss: 13.4733
Test Metrics: Precision=0.9136, Recall=0.9136, F1=0.9136

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 495, Split 3...


Map: 100%|██████████| 495/495 [00:00<00:00, 8940.61 examples/s]
Map: 100%|██████████| 99/99 [00:00<00:00, 7709.83 examples/s]
Map: 100%|██████████| 654/654 [00:00<00:00, 9741.17 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 62/62 [00:05<00:00, 10.90it/s, loss=0.557]


Epoch 1 Loss: 39.5105
Epoch 2/3


Training Epoch 2: 100%|██████████| 62/62 [00:05<00:00, 10.82it/s, loss=0.382]


Epoch 2 Loss: 19.0407
Epoch 3/3


Training Epoch 3: 100%|██████████| 62/62 [00:05<00:00, 11.13it/s, loss=0.124] 


Epoch 3 Loss: 13.7007
Test Metrics: Precision=0.9148, Recall=0.9148, F1=0.9148

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 495, Split 4...


Map: 100%|██████████| 495/495 [00:00<00:00, 9257.46 examples/s]
Map: 100%|██████████| 99/99 [00:00<00:00, 8009.03 examples/s]
Map: 100%|██████████| 654/654 [00:00<00:00, 9296.29 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 62/62 [00:05<00:00, 11.37it/s, loss=0.516]


Epoch 1 Loss: 39.5683
Epoch 2/3


Training Epoch 2: 100%|██████████| 62/62 [00:05<00:00, 11.47it/s, loss=0.514]


Epoch 2 Loss: 20.7466
Epoch 3/3


Training Epoch 3: 100%|██████████| 62/62 [00:05<00:00, 11.32it/s, loss=0.362] 


Epoch 3 Loss: 14.9349
Test Metrics: Precision=0.9113, Recall=0.9113, F1=0.9113

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 495, Split 5...


Map: 100%|██████████| 495/495 [00:00<00:00, 8384.85 examples/s]
Map: 100%|██████████| 99/99 [00:00<00:00, 7495.64 examples/s]
Map: 100%|██████████| 654/654 [00:00<00:00, 8953.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 62/62 [00:05<00:00, 10.58it/s, loss=0.508]


Epoch 1 Loss: 38.7908
Epoch 2/3


Training Epoch 2: 100%|██████████| 62/62 [00:05<00:00, 11.25it/s, loss=0.439]


Epoch 2 Loss: 18.7876
Epoch 3/3


Training Epoch 3: 100%|██████████| 62/62 [00:05<00:00, 11.05it/s, loss=0.265] 


Epoch 3 Loss: 13.5736
Test Metrics: Precision=0.9130, Recall=0.9130, F1=0.9130

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 500, Split 1...


Map: 100%|██████████| 500/500 [00:00<00:00, 8947.81 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 8449.78 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9290.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:05<00:00, 11.54it/s, loss=0.269]


Epoch 1 Loss: 37.3721
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:05<00:00, 11.62it/s, loss=0.242]


Epoch 2 Loss: 18.2167
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:05<00:00, 11.63it/s, loss=0.181] 


Epoch 3 Loss: 13.0934
Test Metrics: Precision=0.9161, Recall=0.9161, F1=0.9161

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 500, Split 2...


Map: 100%|██████████| 500/500 [00:00<00:00, 9255.80 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 8454.21 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9112.70 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:05<00:00, 11.52it/s, loss=0.391]


Epoch 1 Loss: 37.0956
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:05<00:00, 11.44it/s, loss=0.207] 


Epoch 2 Loss: 18.2637
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:05<00:00, 11.46it/s, loss=0.106] 


Epoch 3 Loss: 13.2440
Test Metrics: Precision=0.9130, Recall=0.9130, F1=0.9130

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 500, Split 3...


Map: 100%|██████████| 500/500 [00:00<00:00, 9023.70 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7605.54 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9808.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:05<00:00, 10.97it/s, loss=0.371]


Epoch 1 Loss: 40.9411
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:05<00:00, 11.03it/s, loss=0.365]


Epoch 2 Loss: 20.2494
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:05<00:00, 10.94it/s, loss=0.191]


Epoch 3 Loss: 14.8024
Test Metrics: Precision=0.9136, Recall=0.9136, F1=0.9136

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 500, Split 4...


Map: 100%|██████████| 500/500 [00:00<00:00, 9235.26 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7813.24 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9343.97 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:05<00:00, 11.17it/s, loss=0.377]


Epoch 1 Loss: 41.0145
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:05<00:00, 11.14it/s, loss=0.184]


Epoch 2 Loss: 19.5235
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:05<00:00, 11.71it/s, loss=0.165] 


Epoch 3 Loss: 13.9827
Test Metrics: Precision=0.9116, Recall=0.9116, F1=0.9116

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 500, Split 5...


Map: 100%|██████████| 500/500 [00:00<00:00, 7725.17 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7056.84 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 8446.85 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:05<00:00, 11.39it/s, loss=0.389]


Epoch 1 Loss: 38.7714
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:05<00:00, 11.34it/s, loss=0.269]


Epoch 2 Loss: 18.2915
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:05<00:00, 11.15it/s, loss=0.108] 


Epoch 3 Loss: 13.4277
Test Metrics: Precision=0.9120, Recall=0.9120, F1=0.9120
Results saved to Experiments_full_labeled_biobert.xlsx


In [16]:
models = {
    "state": "dmis-lab/biobert-v1.1"
}

iterate_and_finetune_with_torch(dataset=dataset, file_name='Experiments_moreksplits10_lesssteps20_for_smoother_graphh_biobert.xlsx', models=models, start_size=20, end_size=500, step_size=20, k_splits=10)


Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 20, Split 1...


Map: 100%|██████████| 20/20 [00:00<00:00, 4317.79 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 2111.40 examples/s]
Map: 100%|██████████| 1224/1224 [00:00<00:00, 8995.50 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 3/3 [00:00<00:00, 13.92it/s, loss=1.61]


Epoch 1 Loss: 6.2660
Epoch 2/3


Training Epoch 2: 100%|██████████| 3/3 [00:00<00:00, 15.15it/s, loss=0.515]


Epoch 2 Loss: 2.8233
Epoch 3/3


Training Epoch 3: 100%|██████████| 3/3 [00:00<00:00, 15.98it/s, loss=0.88] 


Epoch 3 Loss: 2.6311


  results_df = pd.concat([results_df, new_row], ignore_index=True)


Test Metrics: Precision=0.7771, Recall=0.7771, F1=0.7771

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 20, Split 2...


Map: 100%|██████████| 20/20 [00:00<00:00, 3509.44 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 2192.81 examples/s]
Map: 100%|██████████| 1224/1224 [00:00<00:00, 8609.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 3/3 [00:00<00:00,  9.86it/s, loss=1.68]


Epoch 1 Loss: 6.2333
Epoch 2/3


Training Epoch 2: 100%|██████████| 3/3 [00:00<00:00, 10.04it/s, loss=0.801]


Epoch 2 Loss: 2.9198
Epoch 3/3


Training Epoch 3: 100%|██████████| 3/3 [00:00<00:00, 10.97it/s, loss=0.945]


Epoch 3 Loss: 2.8540
Test Metrics: Precision=0.7770, Recall=0.7770, F1=0.7770

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 20, Split 3...


Map: 100%|██████████| 20/20 [00:00<00:00, 4755.45 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 1729.97 examples/s]
Map: 100%|██████████| 1224/1224 [00:00<00:00, 9029.80 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 3/3 [00:00<00:00, 14.32it/s, loss=1.49]


Epoch 1 Loss: 6.0605
Epoch 2/3


Training Epoch 2: 100%|██████████| 3/3 [00:00<00:00, 13.93it/s, loss=1.01] 


Epoch 2 Loss: 3.0625
Epoch 3/3


Training Epoch 3: 100%|██████████| 3/3 [00:00<00:00, 14.40it/s, loss=0.736]


Epoch 3 Loss: 2.6837
Test Metrics: Precision=0.7768, Recall=0.7768, F1=0.7768

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 20, Split 4...


Map: 100%|██████████| 20/20 [00:00<00:00, 4325.81 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 1558.64 examples/s]
Map: 100%|██████████| 1224/1224 [00:00<00:00, 9274.92 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 3/3 [00:00<00:00, 11.54it/s, loss=1.63]


Epoch 1 Loss: 6.2633
Epoch 2/3


Training Epoch 2: 100%|██████████| 3/3 [00:00<00:00, 12.51it/s, loss=0.753]


Epoch 2 Loss: 2.7978
Epoch 3/3


Training Epoch 3: 100%|██████████| 3/3 [00:00<00:00, 11.77it/s, loss=0.747]


Epoch 3 Loss: 2.5552
Test Metrics: Precision=0.7770, Recall=0.7770, F1=0.7770

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 20, Split 5...


Map: 100%|██████████| 20/20 [00:00<00:00, 4837.16 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 2264.74 examples/s]
Map: 100%|██████████| 1224/1224 [00:00<00:00, 9005.26 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 3/3 [00:00<00:00, 13.28it/s, loss=1.9] 


Epoch 1 Loss: 6.4804
Epoch 2/3


Training Epoch 2: 100%|██████████| 3/3 [00:00<00:00, 14.50it/s, loss=1.23]


Epoch 2 Loss: 3.6500
Epoch 3/3


Training Epoch 3: 100%|██████████| 3/3 [00:00<00:00, 13.18it/s, loss=0.927]


Epoch 3 Loss: 3.2075
Test Metrics: Precision=0.7784, Recall=0.7784, F1=0.7784

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 20, Split 6...


Map: 100%|██████████| 20/20 [00:00<00:00, 3901.50 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 2013.59 examples/s]
Map: 100%|██████████| 1224/1224 [00:00<00:00, 8926.81 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 3/3 [00:00<00:00,  9.30it/s, loss=1.56]


Epoch 1 Loss: 5.5538
Epoch 2/3


Training Epoch 2: 100%|██████████| 3/3 [00:00<00:00, 10.35it/s, loss=0.671]


Epoch 2 Loss: 2.9050
Epoch 3/3


Training Epoch 3: 100%|██████████| 3/3 [00:00<00:00,  8.93it/s, loss=1.06]


Epoch 3 Loss: 3.0919
Test Metrics: Precision=0.7779, Recall=0.7779, F1=0.7779

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 20, Split 7...


Map: 100%|██████████| 20/20 [00:00<00:00, 3943.87 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 1724.81 examples/s]
Map: 100%|██████████| 1224/1224 [00:00<00:00, 9329.86 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 3/3 [00:00<00:00, 14.21it/s, loss=1.56]


Epoch 1 Loss: 6.1494
Epoch 2/3


Training Epoch 2: 100%|██████████| 3/3 [00:00<00:00, 16.76it/s, loss=1.18]


Epoch 2 Loss: 3.3641
Epoch 3/3


Training Epoch 3: 100%|██████████| 3/3 [00:00<00:00, 13.29it/s, loss=1.26] 


Epoch 3 Loss: 2.8520
Test Metrics: Precision=0.7771, Recall=0.7771, F1=0.7771

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 20, Split 8...


Map: 100%|██████████| 20/20 [00:00<00:00, 5216.80 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 1985.00 examples/s]
Map: 100%|██████████| 1224/1224 [00:00<00:00, 9178.29 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 3/3 [00:00<00:00, 19.00it/s, loss=1.7] 


Epoch 1 Loss: 6.2357
Epoch 2/3


Training Epoch 2: 100%|██████████| 3/3 [00:00<00:00, 18.79it/s, loss=0.849]


Epoch 2 Loss: 3.1512
Epoch 3/3


Training Epoch 3: 100%|██████████| 3/3 [00:00<00:00, 19.10it/s, loss=0.864]


Epoch 3 Loss: 2.7798
Test Metrics: Precision=0.7773, Recall=0.7773, F1=0.7773

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 20, Split 9...


Map: 100%|██████████| 20/20 [00:00<00:00, 4497.43 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 1624.28 examples/s]
Map: 100%|██████████| 1224/1224 [00:00<00:00, 9171.64 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 3/3 [00:00<00:00, 14.81it/s, loss=1.63]


Epoch 1 Loss: 6.3469
Epoch 2/3


Training Epoch 2: 100%|██████████| 3/3 [00:00<00:00, 13.97it/s, loss=0.968]


Epoch 2 Loss: 2.8525
Epoch 3/3


Training Epoch 3: 100%|██████████| 3/3 [00:00<00:00, 15.23it/s, loss=0.946]


Epoch 3 Loss: 2.6135
Test Metrics: Precision=0.7767, Recall=0.7767, F1=0.7767

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 20, Split 10...


Map: 100%|██████████| 20/20 [00:00<00:00, 3424.20 examples/s]
Map: 100%|██████████| 4/4 [00:00<00:00, 1520.36 examples/s]
Map: 100%|██████████| 1224/1224 [00:00<00:00, 8376.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 3/3 [00:00<00:00, 11.43it/s, loss=1.59]


Epoch 1 Loss: 6.0039
Epoch 2/3


Training Epoch 2: 100%|██████████| 3/3 [00:00<00:00, 12.73it/s, loss=0.835]


Epoch 2 Loss: 2.8644
Epoch 3/3


Training Epoch 3: 100%|██████████| 3/3 [00:00<00:00, 13.08it/s, loss=0.642]


Epoch 3 Loss: 2.4410
Test Metrics: Precision=0.7771, Recall=0.7771, F1=0.7771

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 40, Split 1...


Map: 100%|██████████| 40/40 [00:00<00:00, 7126.81 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 2511.18 examples/s]
Map: 100%|██████████| 1200/1200 [00:00<00:00, 8950.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 5/5 [00:00<00:00, 13.95it/s, loss=1.07]


Epoch 1 Loss: 8.4178
Epoch 2/3


Training Epoch 2: 100%|██████████| 5/5 [00:00<00:00, 15.07it/s, loss=0.956]


Epoch 2 Loss: 4.6535
Epoch 3/3


Training Epoch 3: 100%|██████████| 5/5 [00:00<00:00, 14.50it/s, loss=0.852]


Epoch 3 Loss: 3.9541
Test Metrics: Precision=0.7765, Recall=0.7765, F1=0.7765

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 40, Split 2...


Map: 100%|██████████| 40/40 [00:00<00:00, 4803.37 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 2815.68 examples/s]
Map: 100%|██████████| 1200/1200 [00:00<00:00, 3756.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 5/5 [00:00<00:00,  8.98it/s, loss=1.33]


Epoch 1 Loss: 8.2950
Epoch 2/3


Training Epoch 2: 100%|██████████| 5/5 [00:00<00:00,  9.14it/s, loss=0.636]


Epoch 2 Loss: 4.8020
Epoch 3/3


Training Epoch 3: 100%|██████████| 5/5 [00:00<00:00,  9.37it/s, loss=1.04] 


Epoch 3 Loss: 4.4654
Test Metrics: Precision=0.7781, Recall=0.7781, F1=0.7781

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 40, Split 3...


Map: 100%|██████████| 40/40 [00:00<00:00, 6938.18 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 2178.29 examples/s]
Map: 100%|██████████| 1200/1200 [00:00<00:00, 9298.10 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 5/5 [00:00<00:00, 13.31it/s, loss=0.999]


Epoch 1 Loss: 8.3347
Epoch 2/3


Training Epoch 2: 100%|██████████| 5/5 [00:00<00:00, 12.57it/s, loss=0.927]


Epoch 2 Loss: 4.5997
Epoch 3/3


Training Epoch 3: 100%|██████████| 5/5 [00:00<00:00, 14.68it/s, loss=0.982]


Epoch 3 Loss: 4.0257
Test Metrics: Precision=0.7767, Recall=0.7767, F1=0.7767

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 40, Split 4...


Map: 100%|██████████| 40/40 [00:00<00:00, 5582.17 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 2892.12 examples/s]
Map: 100%|██████████| 1200/1200 [00:00<00:00, 9275.86 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 5/5 [00:00<00:00,  9.24it/s, loss=0.898]


Epoch 1 Loss: 8.0829
Epoch 2/3


Training Epoch 2: 100%|██████████| 5/5 [00:00<00:00, 10.06it/s, loss=0.879]


Epoch 2 Loss: 4.3408
Epoch 3/3


Training Epoch 3: 100%|██████████| 5/5 [00:00<00:00,  9.52it/s, loss=0.834]


Epoch 3 Loss: 3.9943
Test Metrics: Precision=0.7765, Recall=0.7765, F1=0.7765

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 40, Split 5...


Map: 100%|██████████| 40/40 [00:00<00:00, 6081.13 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 2675.79 examples/s]
Map: 100%|██████████| 1200/1200 [00:00<00:00, 9167.11 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 5/5 [00:00<00:00, 10.56it/s, loss=1.06]


Epoch 1 Loss: 8.4923
Epoch 2/3


Training Epoch 2: 100%|██████████| 5/5 [00:00<00:00, 10.70it/s, loss=0.933]


Epoch 2 Loss: 4.6300
Epoch 3/3


Training Epoch 3: 100%|██████████| 5/5 [00:00<00:00, 10.15it/s, loss=0.749]


Epoch 3 Loss: 4.1908
Test Metrics: Precision=0.7781, Recall=0.7781, F1=0.7781

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 40, Split 6...


Map: 100%|██████████| 40/40 [00:00<00:00, 5553.16 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 2985.27 examples/s]
Map: 100%|██████████| 1200/1200 [00:00<00:00, 9013.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 5/5 [00:00<00:00,  9.68it/s, loss=1.2] 


Epoch 1 Loss: 8.4865
Epoch 2/3


Training Epoch 2: 100%|██████████| 5/5 [00:00<00:00,  9.75it/s, loss=0.887]


Epoch 2 Loss: 4.8180
Epoch 3/3


Training Epoch 3: 100%|██████████| 5/5 [00:00<00:00,  9.82it/s, loss=0.877]


Epoch 3 Loss: 4.2743
Test Metrics: Precision=0.7773, Recall=0.7773, F1=0.7773

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 40, Split 7...


Map: 100%|██████████| 40/40 [00:00<00:00, 5934.85 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 2267.19 examples/s]
Map: 100%|██████████| 1200/1200 [00:00<00:00, 9155.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 5/5 [00:00<00:00, 12.61it/s, loss=0.793]


Epoch 1 Loss: 7.5662
Epoch 2/3


Training Epoch 2: 100%|██████████| 5/5 [00:00<00:00, 12.20it/s, loss=1.24] 


Epoch 2 Loss: 4.4936
Epoch 3/3


Training Epoch 3: 100%|██████████| 5/5 [00:00<00:00, 12.36it/s, loss=0.658]


Epoch 3 Loss: 3.6697
Test Metrics: Precision=0.7791, Recall=0.7791, F1=0.7791

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 40, Split 8...


Map: 100%|██████████| 40/40 [00:00<00:00, 6983.81 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 3244.80 examples/s]
Map: 100%|██████████| 1200/1200 [00:00<00:00, 9008.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 5/5 [00:00<00:00, 13.16it/s, loss=0.778]


Epoch 1 Loss: 7.3651
Epoch 2/3


Training Epoch 2: 100%|██████████| 5/5 [00:00<00:00, 12.56it/s, loss=0.941]


Epoch 2 Loss: 4.5549
Epoch 3/3


Training Epoch 3: 100%|██████████| 5/5 [00:00<00:00, 12.36it/s, loss=0.585]


Epoch 3 Loss: 3.9349
Test Metrics: Precision=0.7782, Recall=0.7782, F1=0.7782

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 40, Split 9...


Map: 100%|██████████| 40/40 [00:00<00:00, 5351.07 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 2703.60 examples/s]
Map: 100%|██████████| 1200/1200 [00:00<00:00, 9159.14 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 5/5 [00:00<00:00, 10.55it/s, loss=1.01]


Epoch 1 Loss: 8.5208
Epoch 2/3


Training Epoch 2: 100%|██████████| 5/5 [00:00<00:00, 10.08it/s, loss=0.896]


Epoch 2 Loss: 4.7165
Epoch 3/3


Training Epoch 3: 100%|██████████| 5/5 [00:00<00:00, 10.69it/s, loss=0.897]


Epoch 3 Loss: 4.4279
Test Metrics: Precision=0.7770, Recall=0.7770, F1=0.7770

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 40, Split 10...


Map: 100%|██████████| 40/40 [00:00<00:00, 5620.70 examples/s]
Map: 100%|██████████| 8/8 [00:00<00:00, 3039.35 examples/s]
Map: 100%|██████████| 1200/1200 [00:00<00:00, 9140.64 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 5/5 [00:00<00:00, 10.27it/s, loss=0.784]


Epoch 1 Loss: 7.6528
Epoch 2/3


Training Epoch 2: 100%|██████████| 5/5 [00:00<00:00, 11.01it/s, loss=0.993]


Epoch 2 Loss: 4.4047
Epoch 3/3


Training Epoch 3: 100%|██████████| 5/5 [00:00<00:00,  9.58it/s, loss=0.668]


Epoch 3 Loss: 4.0381
Test Metrics: Precision=0.7765, Recall=0.7765, F1=0.7765

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 60, Split 1...


Map: 100%|██████████| 60/60 [00:00<00:00, 6812.44 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 3607.74 examples/s]
Map: 100%|██████████| 1176/1176 [00:00<00:00, 9481.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 8/8 [00:00<00:00, 11.54it/s, loss=0.39] 


Epoch 1 Loss: 10.2174
Epoch 2/3


Training Epoch 2: 100%|██████████| 8/8 [00:00<00:00, 12.50it/s, loss=0.612]


Epoch 2 Loss: 5.9511
Epoch 3/3


Training Epoch 3: 100%|██████████| 8/8 [00:00<00:00, 11.93it/s, loss=0.553]


Epoch 3 Loss: 4.8918
Test Metrics: Precision=0.7885, Recall=0.7885, F1=0.7885

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 60, Split 2...


Map: 100%|██████████| 60/60 [00:00<00:00, 6773.56 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 3404.47 examples/s]
Map: 100%|██████████| 1176/1176 [00:00<00:00, 8869.11 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 8/8 [00:00<00:00, 11.01it/s, loss=0.85] 


Epoch 1 Loss: 10.3882
Epoch 2/3


Training Epoch 2: 100%|██████████| 8/8 [00:00<00:00, 11.07it/s, loss=0.549]


Epoch 2 Loss: 6.5492
Epoch 3/3


Training Epoch 3: 100%|██████████| 8/8 [00:00<00:00, 11.45it/s, loss=0.886]


Epoch 3 Loss: 5.6827
Test Metrics: Precision=0.7915, Recall=0.7915, F1=0.7915

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 60, Split 3...


Map: 100%|██████████| 60/60 [00:00<00:00, 7708.70 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 3947.58 examples/s]
Map: 100%|██████████| 1176/1176 [00:00<00:00, 9114.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 8/8 [00:00<00:00, 13.63it/s, loss=1.32] 


Epoch 1 Loss: 11.2147
Epoch 2/3


Training Epoch 2: 100%|██████████| 8/8 [00:00<00:00, 14.25it/s, loss=0.806]


Epoch 2 Loss: 6.5533
Epoch 3/3


Training Epoch 3: 100%|██████████| 8/8 [00:00<00:00, 13.92it/s, loss=0.466]


Epoch 3 Loss: 5.1940
Test Metrics: Precision=0.7800, Recall=0.7800, F1=0.7800

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 60, Split 4...


Map: 100%|██████████| 60/60 [00:00<00:00, 6919.96 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 3729.65 examples/s]
Map: 100%|██████████| 1176/1176 [00:00<00:00, 9101.55 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 8/8 [00:00<00:00, 10.96it/s, loss=0.73] 


Epoch 1 Loss: 10.7896
Epoch 2/3


Training Epoch 2: 100%|██████████| 8/8 [00:00<00:00, 11.84it/s, loss=0.689]


Epoch 2 Loss: 6.0210
Epoch 3/3


Training Epoch 3: 100%|██████████| 8/8 [00:00<00:00, 10.38it/s, loss=0.808]


Epoch 3 Loss: 5.0762
Test Metrics: Precision=0.7799, Recall=0.7799, F1=0.7799

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 60, Split 5...


Map: 100%|██████████| 60/60 [00:00<00:00, 7030.54 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 3467.32 examples/s]
Map: 100%|██████████| 1176/1176 [00:00<00:00, 9329.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 8/8 [00:00<00:00, 12.55it/s, loss=1.09] 


Epoch 1 Loss: 11.4637
Epoch 2/3


Training Epoch 2: 100%|██████████| 8/8 [00:00<00:00, 11.93it/s, loss=0.719]


Epoch 2 Loss: 6.3710
Epoch 3/3


Training Epoch 3: 100%|██████████| 8/8 [00:00<00:00, 12.63it/s, loss=0.623]


Epoch 3 Loss: 5.3732
Test Metrics: Precision=0.7884, Recall=0.7884, F1=0.7884

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 60, Split 6...


Map: 100%|██████████| 60/60 [00:00<00:00, 6901.36 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 2555.81 examples/s]
Map: 100%|██████████| 1176/1176 [00:00<00:00, 9149.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 8/8 [00:00<00:00, 11.25it/s, loss=0.951]


Epoch 1 Loss: 11.3625
Epoch 2/3


Training Epoch 2: 100%|██████████| 8/8 [00:00<00:00, 11.47it/s, loss=0.54] 


Epoch 2 Loss: 6.5480
Epoch 3/3


Training Epoch 3: 100%|██████████| 8/8 [00:00<00:00, 12.24it/s, loss=0.71] 


Epoch 3 Loss: 5.7120
Test Metrics: Precision=0.7798, Recall=0.7798, F1=0.7798

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 60, Split 7...


Map: 100%|██████████| 60/60 [00:00<00:00, 6743.98 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 3674.92 examples/s]
Map: 100%|██████████| 1176/1176 [00:00<00:00, 9127.50 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 8/8 [00:00<00:00, 12.83it/s, loss=0.723]


Epoch 1 Loss: 9.7239
Epoch 2/3


Training Epoch 2: 100%|██████████| 8/8 [00:00<00:00, 12.94it/s, loss=0.412]


Epoch 2 Loss: 5.7687
Epoch 3/3


Training Epoch 3: 100%|██████████| 8/8 [00:00<00:00, 13.37it/s, loss=1.14] 


Epoch 3 Loss: 5.4240
Test Metrics: Precision=0.7851, Recall=0.7851, F1=0.7851

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 60, Split 8...


Map: 100%|██████████| 60/60 [00:00<00:00, 7741.19 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 4008.57 examples/s]
Map: 100%|██████████| 1176/1176 [00:00<00:00, 8979.79 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 8/8 [00:00<00:00, 14.42it/s, loss=0.821]


Epoch 1 Loss: 10.2560
Epoch 2/3


Training Epoch 2: 100%|██████████| 8/8 [00:00<00:00, 14.59it/s, loss=0.73] 


Epoch 2 Loss: 6.1241
Epoch 3/3


Training Epoch 3: 100%|██████████| 8/8 [00:00<00:00, 14.96it/s, loss=0.603]


Epoch 3 Loss: 5.0814
Test Metrics: Precision=0.7887, Recall=0.7887, F1=0.7887

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 60, Split 9...


Map: 100%|██████████| 60/60 [00:00<00:00, 6640.41 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 2871.83 examples/s]
Map: 100%|██████████| 1176/1176 [00:00<00:00, 9469.95 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 8/8 [00:00<00:00, 12.10it/s, loss=0.967]


Epoch 1 Loss: 11.1061
Epoch 2/3


Training Epoch 2: 100%|██████████| 8/8 [00:00<00:00, 11.80it/s, loss=0.783]


Epoch 2 Loss: 6.6224
Epoch 3/3


Training Epoch 3: 100%|██████████| 8/8 [00:00<00:00, 11.51it/s, loss=0.669]


Epoch 3 Loss: 5.7981
Test Metrics: Precision=0.7832, Recall=0.7832, F1=0.7832

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 60, Split 10...


Map: 100%|██████████| 60/60 [00:00<00:00, 6514.58 examples/s]
Map: 100%|██████████| 12/12 [00:00<00:00, 3218.14 examples/s]
Map: 100%|██████████| 1176/1176 [00:00<00:00, 9403.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 8/8 [00:00<00:00, 10.22it/s, loss=0.926]


Epoch 1 Loss: 10.5646
Epoch 2/3


Training Epoch 2: 100%|██████████| 8/8 [00:00<00:00, 10.94it/s, loss=0.747]


Epoch 2 Loss: 5.8834
Epoch 3/3


Training Epoch 3: 100%|██████████| 8/8 [00:00<00:00, 10.81it/s, loss=0.549]


Epoch 3 Loss: 5.0628
Test Metrics: Precision=0.7757, Recall=0.7757, F1=0.7757

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 80, Split 1...


Map: 100%|██████████| 80/80 [00:00<00:00, 7379.30 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 4227.33 examples/s]
Map: 100%|██████████| 1152/1152 [00:00<00:00, 9309.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 10/10 [00:00<00:00, 11.82it/s, loss=1]   


Epoch 1 Loss: 11.6967
Epoch 2/3


Training Epoch 2: 100%|██████████| 10/10 [00:00<00:00, 11.21it/s, loss=0.497]


Epoch 2 Loss: 6.9540
Epoch 3/3


Training Epoch 3: 100%|██████████| 10/10 [00:00<00:00, 11.97it/s, loss=0.459]


Epoch 3 Loss: 5.5737
Test Metrics: Precision=0.8291, Recall=0.8291, F1=0.8291

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 80, Split 2...


Map: 100%|██████████| 80/80 [00:00<00:00, 7430.12 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 4006.98 examples/s]
Map: 100%|██████████| 1152/1152 [00:00<00:00, 9340.26 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 10/10 [00:00<00:00, 11.14it/s, loss=0.94]


Epoch 1 Loss: 13.2215
Epoch 2/3


Training Epoch 2: 100%|██████████| 10/10 [00:00<00:00, 10.64it/s, loss=0.667]


Epoch 2 Loss: 8.2626
Epoch 3/3


Training Epoch 3: 100%|██████████| 10/10 [00:00<00:00, 10.85it/s, loss=0.591]


Epoch 3 Loss: 6.8538
Test Metrics: Precision=0.8038, Recall=0.8038, F1=0.8038

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 80, Split 3...


Map: 100%|██████████| 80/80 [00:00<00:00, 8343.76 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 4167.22 examples/s]
Map: 100%|██████████| 1152/1152 [00:00<00:00, 9147.67 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 10/10 [00:00<00:00, 13.32it/s, loss=0.894]


Epoch 1 Loss: 12.6714
Epoch 2/3


Training Epoch 2: 100%|██████████| 10/10 [00:00<00:00, 12.78it/s, loss=0.674]


Epoch 2 Loss: 7.4457
Epoch 3/3


Training Epoch 3: 100%|██████████| 10/10 [00:00<00:00, 13.71it/s, loss=0.495]


Epoch 3 Loss: 6.2051
Test Metrics: Precision=0.8281, Recall=0.8281, F1=0.8281

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 80, Split 4...


Map: 100%|██████████| 80/80 [00:00<00:00, 7695.97 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 3125.41 examples/s]
Map: 100%|██████████| 1152/1152 [00:00<00:00, 8983.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 10/10 [00:00<00:00, 10.25it/s, loss=0.854]


Epoch 1 Loss: 12.2253
Epoch 2/3


Training Epoch 2: 100%|██████████| 10/10 [00:00<00:00, 10.89it/s, loss=0.51]


Epoch 2 Loss: 7.1902
Epoch 3/3


Training Epoch 3: 100%|██████████| 10/10 [00:00<00:00, 10.73it/s, loss=0.707]


Epoch 3 Loss: 5.7717
Test Metrics: Precision=0.8242, Recall=0.8242, F1=0.8242

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 80, Split 5...


Map: 100%|██████████| 80/80 [00:00<00:00, 6987.60 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 3880.70 examples/s]
Map: 100%|██████████| 1152/1152 [00:00<00:00, 9295.43 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 10/10 [00:00<00:00, 11.27it/s, loss=0.805]


Epoch 1 Loss: 12.0118
Epoch 2/3


Training Epoch 2: 100%|██████████| 10/10 [00:00<00:00, 10.71it/s, loss=0.619]


Epoch 2 Loss: 7.4008
Epoch 3/3


Training Epoch 3: 100%|██████████| 10/10 [00:00<00:00, 11.99it/s, loss=0.499]


Epoch 3 Loss: 6.2666
Test Metrics: Precision=0.7908, Recall=0.7908, F1=0.7908

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 80, Split 6...


Map: 100%|██████████| 80/80 [00:00<00:00, 7592.19 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 4050.02 examples/s]
Map: 100%|██████████| 1152/1152 [00:00<00:00, 9280.02 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 10/10 [00:00<00:00, 11.58it/s, loss=0.685]


Epoch 1 Loss: 12.8190
Epoch 2/3


Training Epoch 2: 100%|██████████| 10/10 [00:00<00:00, 11.07it/s, loss=0.748]


Epoch 2 Loss: 7.4268
Epoch 3/3


Training Epoch 3: 100%|██████████| 10/10 [00:00<00:00, 10.73it/s, loss=0.523]


Epoch 3 Loss: 6.1038
Test Metrics: Precision=0.8383, Recall=0.8383, F1=0.8383

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 80, Split 7...


Map: 100%|██████████| 80/80 [00:00<00:00, 6630.13 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 4359.98 examples/s]
Map: 100%|██████████| 1152/1152 [00:00<00:00, 9293.43 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 10/10 [00:00<00:00, 11.23it/s, loss=1.04]


Epoch 1 Loss: 11.6988
Epoch 2/3


Training Epoch 2: 100%|██████████| 10/10 [00:00<00:00, 11.30it/s, loss=0.57]


Epoch 2 Loss: 7.2905
Epoch 3/3


Training Epoch 3: 100%|██████████| 10/10 [00:00<00:00, 11.23it/s, loss=0.618]


Epoch 3 Loss: 5.6852
Test Metrics: Precision=0.8451, Recall=0.8451, F1=0.8451

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 80, Split 8...


Map: 100%|██████████| 80/80 [00:00<00:00, 8047.21 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 3174.95 examples/s]
Map: 100%|██████████| 1152/1152 [00:00<00:00, 8973.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 10/10 [00:00<00:00, 11.87it/s, loss=0.711]


Epoch 1 Loss: 12.6204
Epoch 2/3


Training Epoch 2: 100%|██████████| 10/10 [00:00<00:00, 11.62it/s, loss=0.647]


Epoch 2 Loss: 7.2274
Epoch 3/3


Training Epoch 3: 100%|██████████| 10/10 [00:00<00:00, 11.49it/s, loss=0.806]


Epoch 3 Loss: 5.9561
Test Metrics: Precision=0.8322, Recall=0.8322, F1=0.8322

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 80, Split 9...


Map: 100%|██████████| 80/80 [00:00<00:00, 7474.65 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 4159.47 examples/s]
Map: 100%|██████████| 1152/1152 [00:00<00:00, 9195.62 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 10/10 [00:00<00:00, 12.02it/s, loss=1.01]


Epoch 1 Loss: 12.3499
Epoch 2/3


Training Epoch 2: 100%|██████████| 10/10 [00:00<00:00, 12.69it/s, loss=0.495]


Epoch 2 Loss: 7.3889
Epoch 3/3


Training Epoch 3: 100%|██████████| 10/10 [00:00<00:00, 12.19it/s, loss=0.544]


Epoch 3 Loss: 6.0401
Test Metrics: Precision=0.8391, Recall=0.8391, F1=0.8391

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 80, Split 10...


Map: 100%|██████████| 80/80 [00:00<00:00, 6919.58 examples/s]
Map: 100%|██████████| 16/16 [00:00<00:00, 4118.88 examples/s]
Map: 100%|██████████| 1152/1152 [00:00<00:00, 9221.86 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 10/10 [00:00<00:00, 10.78it/s, loss=0.679]


Epoch 1 Loss: 12.3978
Epoch 2/3


Training Epoch 2: 100%|██████████| 10/10 [00:00<00:00, 10.68it/s, loss=0.706]


Epoch 2 Loss: 7.3898
Epoch 3/3


Training Epoch 3: 100%|██████████| 10/10 [00:00<00:00, 10.61it/s, loss=0.559]


Epoch 3 Loss: 6.0882
Test Metrics: Precision=0.7993, Recall=0.7993, F1=0.7993

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 100, Split 1...


Map: 100%|██████████| 100/100 [00:00<00:00, 7982.00 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 4343.05 examples/s]
Map: 100%|██████████| 1128/1128 [00:00<00:00, 8833.13 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 13/13 [00:01<00:00, 11.22it/s, loss=0.891]


Epoch 1 Loss: 14.8360
Epoch 2/3


Training Epoch 2: 100%|██████████| 13/13 [00:01<00:00, 11.44it/s, loss=0.488]


Epoch 2 Loss: 8.5142
Epoch 3/3


Training Epoch 3: 100%|██████████| 13/13 [00:01<00:00, 11.65it/s, loss=0.419]


Epoch 3 Loss: 6.7260
Test Metrics: Precision=0.8497, Recall=0.8497, F1=0.8497

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 100, Split 2...


Map: 100%|██████████| 100/100 [00:00<00:00, 7915.13 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 5072.02 examples/s]
Map: 100%|██████████| 1128/1128 [00:00<00:00, 9066.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 13/13 [00:01<00:00, 11.85it/s, loss=0.793]


Epoch 1 Loss: 13.8405
Epoch 2/3


Training Epoch 2: 100%|██████████| 13/13 [00:01<00:00, 10.85it/s, loss=0.529]


Epoch 2 Loss: 7.7299
Epoch 3/3


Training Epoch 3: 100%|██████████| 13/13 [00:01<00:00, 11.36it/s, loss=1.03] 


Epoch 3 Loss: 6.8124
Test Metrics: Precision=0.8553, Recall=0.8553, F1=0.8553

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 100, Split 3...


Map: 100%|██████████| 100/100 [00:00<00:00, 8695.02 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 5361.50 examples/s]
Map: 100%|██████████| 1128/1128 [00:00<00:00, 3622.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 13/13 [00:01<00:00, 12.55it/s, loss=0.658]


Epoch 1 Loss: 14.7064
Epoch 2/3


Training Epoch 2: 100%|██████████| 13/13 [00:00<00:00, 13.52it/s, loss=0.358]


Epoch 2 Loss: 7.8083
Epoch 3/3


Training Epoch 3: 100%|██████████| 13/13 [00:00<00:00, 13.40it/s, loss=0.443]


Epoch 3 Loss: 6.3266
Test Metrics: Precision=0.8591, Recall=0.8591, F1=0.8591

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 100, Split 4...


Map: 100%|██████████| 100/100 [00:00<00:00, 8238.83 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 3688.92 examples/s]
Map: 100%|██████████| 1128/1128 [00:00<00:00, 9262.86 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 13/13 [00:01<00:00, 11.01it/s, loss=0.738]


Epoch 1 Loss: 14.5823
Epoch 2/3


Training Epoch 2: 100%|██████████| 13/13 [00:01<00:00, 11.89it/s, loss=0.479]


Epoch 2 Loss: 8.0549
Epoch 3/3


Training Epoch 3: 100%|██████████| 13/13 [00:01<00:00, 11.66it/s, loss=0.581]


Epoch 3 Loss: 6.1383
Test Metrics: Precision=0.8593, Recall=0.8593, F1=0.8593

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 100, Split 5...


Map: 100%|██████████| 100/100 [00:00<00:00, 7880.33 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 6194.97 examples/s]
Map: 100%|██████████| 1128/1128 [00:00<00:00, 9170.69 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 13/13 [00:01<00:00, 11.94it/s, loss=0.777]


Epoch 1 Loss: 14.2005
Epoch 2/3


Training Epoch 2: 100%|██████████| 13/13 [00:01<00:00, 11.53it/s, loss=0.461]


Epoch 2 Loss: 8.0354
Epoch 3/3


Training Epoch 3: 100%|██████████| 13/13 [00:01<00:00, 11.41it/s, loss=0.54] 


Epoch 3 Loss: 6.5437
Test Metrics: Precision=0.8440, Recall=0.8440, F1=0.8440

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 100, Split 6...


Map: 100%|██████████| 100/100 [00:00<00:00, 7165.59 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 4371.80 examples/s]
Map: 100%|██████████| 1128/1128 [00:00<00:00, 9315.10 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 13/13 [00:01<00:00, 10.57it/s, loss=0.831]


Epoch 1 Loss: 14.9114
Epoch 2/3


Training Epoch 2: 100%|██████████| 13/13 [00:01<00:00,  9.80it/s, loss=0.501]


Epoch 2 Loss: 7.9311
Epoch 3/3


Training Epoch 3: 100%|██████████| 13/13 [00:01<00:00, 10.27it/s, loss=0.458]


Epoch 3 Loss: 6.4841
Test Metrics: Precision=0.8529, Recall=0.8529, F1=0.8529

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 100, Split 7...


Map: 100%|██████████| 100/100 [00:00<00:00, 6930.56 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 4196.82 examples/s]
Map: 100%|██████████| 1128/1128 [00:00<00:00, 9265.24 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 13/13 [00:01<00:00, 11.71it/s, loss=1.02] 


Epoch 1 Loss: 14.2262
Epoch 2/3


Training Epoch 2: 100%|██████████| 13/13 [00:01<00:00, 11.44it/s, loss=0.435]


Epoch 2 Loss: 7.7852
Epoch 3/3


Training Epoch 3: 100%|██████████| 13/13 [00:01<00:00, 11.42it/s, loss=0.535]


Epoch 3 Loss: 6.5222
Test Metrics: Precision=0.8581, Recall=0.8581, F1=0.8581

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 100, Split 8...


Map: 100%|██████████| 100/100 [00:00<00:00, 8765.89 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 4188.65 examples/s]
Map: 100%|██████████| 1128/1128 [00:00<00:00, 8956.50 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 13/13 [00:01<00:00, 12.25it/s, loss=0.898]


Epoch 1 Loss: 15.2256
Epoch 2/3


Training Epoch 2: 100%|██████████| 13/13 [00:00<00:00, 13.33it/s, loss=0.329]


Epoch 2 Loss: 8.6702
Epoch 3/3


Training Epoch 3: 100%|██████████| 13/13 [00:01<00:00, 12.95it/s, loss=0.375]


Epoch 3 Loss: 7.2029
Test Metrics: Precision=0.8492, Recall=0.8492, F1=0.8492

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 100, Split 9...


Map: 100%|██████████| 100/100 [00:00<00:00, 7817.61 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 4835.21 examples/s]
Map: 100%|██████████| 1128/1128 [00:00<00:00, 8929.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 13/13 [00:01<00:00, 11.77it/s, loss=1.27] 


Epoch 1 Loss: 16.4881
Epoch 2/3


Training Epoch 2: 100%|██████████| 13/13 [00:01<00:00, 12.29it/s, loss=0.587]


Epoch 2 Loss: 9.2538
Epoch 3/3


Training Epoch 3: 100%|██████████| 13/13 [00:01<00:00, 11.78it/s, loss=1.07] 


Epoch 3 Loss: 7.6640
Test Metrics: Precision=0.8401, Recall=0.8401, F1=0.8401

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 100, Split 10...


Map: 100%|██████████| 100/100 [00:00<00:00, 7929.19 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 4762.74 examples/s]
Map: 100%|██████████| 1128/1128 [00:00<00:00, 9277.89 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 13/13 [00:01<00:00, 11.51it/s, loss=0.984]


Epoch 1 Loss: 15.1338
Epoch 2/3


Training Epoch 2: 100%|██████████| 13/13 [00:01<00:00, 11.72it/s, loss=0.383]


Epoch 2 Loss: 8.4299
Epoch 3/3


Training Epoch 3: 100%|██████████| 13/13 [00:01<00:00, 11.59it/s, loss=0.743]


Epoch 3 Loss: 6.9669
Test Metrics: Precision=0.8435, Recall=0.8435, F1=0.8435

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 120, Split 1...


Map: 100%|██████████| 120/120 [00:00<00:00, 8261.52 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 4449.80 examples/s]
Map: 100%|██████████| 1104/1104 [00:00<00:00, 9199.70 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 15/15 [00:01<00:00, 10.94it/s, loss=0.772]


Epoch 1 Loss: 15.7350
Epoch 2/3


Training Epoch 2: 100%|██████████| 15/15 [00:01<00:00, 11.35it/s, loss=0.428]


Epoch 2 Loss: 8.9068
Epoch 3/3


Training Epoch 3: 100%|██████████| 15/15 [00:01<00:00, 11.56it/s, loss=0.61] 


Epoch 3 Loss: 7.2084
Test Metrics: Precision=0.8559, Recall=0.8559, F1=0.8559

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 120, Split 2...


Map: 100%|██████████| 120/120 [00:00<00:00, 8170.85 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 5332.59 examples/s]
Map: 100%|██████████| 1104/1104 [00:00<00:00, 9200.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 15/15 [00:01<00:00, 10.73it/s, loss=0.68] 


Epoch 1 Loss: 16.9211
Epoch 2/3


Training Epoch 2: 100%|██████████| 15/15 [00:01<00:00, 11.17it/s, loss=0.466]


Epoch 2 Loss: 8.6161
Epoch 3/3


Training Epoch 3: 100%|██████████| 15/15 [00:01<00:00, 10.91it/s, loss=0.463]


Epoch 3 Loss: 7.1959
Test Metrics: Precision=0.8489, Recall=0.8489, F1=0.8489

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 120, Split 3...


Map: 100%|██████████| 120/120 [00:00<00:00, 8044.83 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 4339.12 examples/s]
Map: 100%|██████████| 1104/1104 [00:00<00:00, 9221.24 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 15/15 [00:01<00:00, 11.91it/s, loss=0.768]


Epoch 1 Loss: 16.0203
Epoch 2/3


Training Epoch 2: 100%|██████████| 15/15 [00:01<00:00, 11.85it/s, loss=0.338]


Epoch 2 Loss: 8.3442
Epoch 3/3


Training Epoch 3: 100%|██████████| 15/15 [00:01<00:00, 11.84it/s, loss=0.586]


Epoch 3 Loss: 6.7265
Test Metrics: Precision=0.8645, Recall=0.8645, F1=0.8645

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 120, Split 4...


Map: 100%|██████████| 120/120 [00:00<00:00, 8519.24 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 5757.12 examples/s]
Map: 100%|██████████| 1104/1104 [00:00<00:00, 9163.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 15/15 [00:01<00:00, 11.31it/s, loss=0.862]


Epoch 1 Loss: 15.9637
Epoch 2/3


Training Epoch 2: 100%|██████████| 15/15 [00:01<00:00, 11.72it/s, loss=0.472]


Epoch 2 Loss: 8.5773
Epoch 3/3


Training Epoch 3: 100%|██████████| 15/15 [00:01<00:00, 11.43it/s, loss=0.436]


Epoch 3 Loss: 6.4275
Test Metrics: Precision=0.8622, Recall=0.8622, F1=0.8622

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 120, Split 5...


Map: 100%|██████████| 120/120 [00:00<00:00, 7867.64 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 6490.64 examples/s]
Map: 100%|██████████| 1104/1104 [00:00<00:00, 8849.89 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 15/15 [00:01<00:00, 11.00it/s, loss=0.623]


Epoch 1 Loss: 16.1527
Epoch 2/3


Training Epoch 2: 100%|██████████| 15/15 [00:01<00:00, 10.92it/s, loss=0.381]


Epoch 2 Loss: 8.1636
Epoch 3/3


Training Epoch 3: 100%|██████████| 15/15 [00:01<00:00, 11.74it/s, loss=0.398]


Epoch 3 Loss: 6.5824
Test Metrics: Precision=0.8541, Recall=0.8541, F1=0.8541

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 120, Split 6...


Map: 100%|██████████| 120/120 [00:00<00:00, 7813.41 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 5799.25 examples/s]
Map: 100%|██████████| 1104/1104 [00:00<00:00, 9257.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 15/15 [00:01<00:00, 10.11it/s, loss=0.753]


Epoch 1 Loss: 17.0526
Epoch 2/3


Training Epoch 2: 100%|██████████| 15/15 [00:01<00:00,  9.92it/s, loss=0.548]


Epoch 2 Loss: 8.9135
Epoch 3/3


Training Epoch 3: 100%|██████████| 15/15 [00:01<00:00,  9.69it/s, loss=0.486]


Epoch 3 Loss: 7.6897
Test Metrics: Precision=0.8544, Recall=0.8544, F1=0.8544

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 120, Split 7...


Map: 100%|██████████| 120/120 [00:00<00:00, 7652.56 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 4285.73 examples/s]
Map: 100%|██████████| 1104/1104 [00:00<00:00, 9436.10 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 15/15 [00:01<00:00, 11.49it/s, loss=0.694]


Epoch 1 Loss: 15.3043
Epoch 2/3


Training Epoch 2: 100%|██████████| 15/15 [00:01<00:00, 11.50it/s, loss=0.533]


Epoch 2 Loss: 8.4040
Epoch 3/3


Training Epoch 3: 100%|██████████| 15/15 [00:01<00:00, 11.37it/s, loss=0.349]


Epoch 3 Loss: 6.7527
Test Metrics: Precision=0.8664, Recall=0.8664, F1=0.8664

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 120, Split 8...


Map: 100%|██████████| 120/120 [00:00<00:00, 8952.94 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 5111.89 examples/s]
Map: 100%|██████████| 1104/1104 [00:00<00:00, 3606.35 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 15/15 [00:01<00:00, 12.93it/s, loss=0.771]


Epoch 1 Loss: 17.6024
Epoch 2/3


Training Epoch 2: 100%|██████████| 15/15 [00:01<00:00, 12.39it/s, loss=0.46] 


Epoch 2 Loss: 9.1348
Epoch 3/3


Training Epoch 3: 100%|██████████| 15/15 [00:01<00:00, 12.44it/s, loss=0.603]


Epoch 3 Loss: 7.3741
Test Metrics: Precision=0.8613, Recall=0.8613, F1=0.8613

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 120, Split 9...


Map: 100%|██████████| 120/120 [00:00<00:00, 8272.65 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 4196.58 examples/s]
Map: 100%|██████████| 1104/1104 [00:00<00:00, 9190.77 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 15/15 [00:01<00:00, 11.49it/s, loss=0.813]


Epoch 1 Loss: 16.6131
Epoch 2/3


Training Epoch 2: 100%|██████████| 15/15 [00:01<00:00, 11.62it/s, loss=0.535]


Epoch 2 Loss: 9.0592
Epoch 3/3


Training Epoch 3: 100%|██████████| 15/15 [00:01<00:00, 11.53it/s, loss=0.486]


Epoch 3 Loss: 7.4173
Test Metrics: Precision=0.8575, Recall=0.8575, F1=0.8575

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 120, Split 10...


Map: 100%|██████████| 120/120 [00:00<00:00, 8154.18 examples/s]
Map: 100%|██████████| 24/24 [00:00<00:00, 4263.41 examples/s]
Map: 100%|██████████| 1104/1104 [00:00<00:00, 9198.35 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 15/15 [00:01<00:00, 11.10it/s, loss=0.688]


Epoch 1 Loss: 15.3517
Epoch 2/3


Training Epoch 2: 100%|██████████| 15/15 [00:01<00:00, 11.76it/s, loss=0.522]


Epoch 2 Loss: 8.5843
Epoch 3/3


Training Epoch 3: 100%|██████████| 15/15 [00:01<00:00, 11.53it/s, loss=0.459]


Epoch 3 Loss: 7.1080
Test Metrics: Precision=0.8557, Recall=0.8557, F1=0.8557

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 140, Split 1...


Map: 100%|██████████| 140/140 [00:00<00:00, 8458.82 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 5230.50 examples/s]
Map: 100%|██████████| 1080/1080 [00:00<00:00, 9377.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 18/18 [00:01<00:00, 11.25it/s, loss=0.644]


Epoch 1 Loss: 17.9040
Epoch 2/3


Training Epoch 2: 100%|██████████| 18/18 [00:01<00:00, 11.59it/s, loss=0.11] 


Epoch 2 Loss: 8.8351
Epoch 3/3


Training Epoch 3: 100%|██████████| 18/18 [00:01<00:00, 11.58it/s, loss=0.113]


Epoch 3 Loss: 7.1876
Test Metrics: Precision=0.8637, Recall=0.8637, F1=0.8637

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 140, Split 2...


Map: 100%|██████████| 140/140 [00:00<00:00, 8486.81 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 5420.50 examples/s]
Map: 100%|██████████| 1080/1080 [00:00<00:00, 9362.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 18/18 [00:01<00:00, 12.52it/s, loss=0.369]


Epoch 1 Loss: 17.6798
Epoch 2/3


Training Epoch 2: 100%|██████████| 18/18 [00:01<00:00, 11.65it/s, loss=0.311]


Epoch 2 Loss: 9.4998
Epoch 3/3


Training Epoch 3: 100%|██████████| 18/18 [00:01<00:00, 11.71it/s, loss=0.289]


Epoch 3 Loss: 7.9575
Test Metrics: Precision=0.8660, Recall=0.8660, F1=0.8660

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 140, Split 3...


Map: 100%|██████████| 140/140 [00:00<00:00, 8488.53 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 5857.38 examples/s]
Map: 100%|██████████| 1080/1080 [00:00<00:00, 8980.03 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 18/18 [00:01<00:00, 12.01it/s, loss=0.432]


Epoch 1 Loss: 17.4852
Epoch 2/3


Training Epoch 2: 100%|██████████| 18/18 [00:01<00:00, 12.18it/s, loss=0.522]


Epoch 2 Loss: 8.9225
Epoch 3/3


Training Epoch 3: 100%|██████████| 18/18 [00:01<00:00, 11.73it/s, loss=0.416]


Epoch 3 Loss: 6.9656
Test Metrics: Precision=0.8733, Recall=0.8733, F1=0.8733

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 140, Split 4...


Map: 100%|██████████| 140/140 [00:00<00:00, 8844.08 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 5207.54 examples/s]
Map: 100%|██████████| 1080/1080 [00:00<00:00, 9363.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 18/18 [00:01<00:00, 11.89it/s, loss=0.684]


Epoch 1 Loss: 18.2347
Epoch 2/3


Training Epoch 2: 100%|██████████| 18/18 [00:01<00:00, 11.94it/s, loss=0.359]


Epoch 2 Loss: 8.9978
Epoch 3/3


Training Epoch 3: 100%|██████████| 18/18 [00:01<00:00, 11.94it/s, loss=0.391]


Epoch 3 Loss: 6.8105
Test Metrics: Precision=0.8680, Recall=0.8680, F1=0.8680

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 140, Split 5...


Map: 100%|██████████| 140/140 [00:00<00:00, 8013.14 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 5651.88 examples/s]
Map: 100%|██████████| 1080/1080 [00:00<00:00, 9357.95 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 18/18 [00:01<00:00, 11.33it/s, loss=0.623]


Epoch 1 Loss: 17.9854
Epoch 2/3


Training Epoch 2: 100%|██████████| 18/18 [00:01<00:00, 11.62it/s, loss=0.474]


Epoch 2 Loss: 9.3391
Epoch 3/3


Training Epoch 3: 100%|██████████| 18/18 [00:01<00:00, 11.53it/s, loss=0.352]


Epoch 3 Loss: 7.4887
Test Metrics: Precision=0.8614, Recall=0.8614, F1=0.8614

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 140, Split 6...


Map: 100%|██████████| 140/140 [00:00<00:00, 8093.99 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 5369.20 examples/s]
Map: 100%|██████████| 1080/1080 [00:00<00:00, 9481.51 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 18/18 [00:01<00:00, 11.42it/s, loss=0.782]


Epoch 1 Loss: 18.2636
Epoch 2/3


Training Epoch 2: 100%|██████████| 18/18 [00:01<00:00, 10.39it/s, loss=0.384]


Epoch 2 Loss: 10.3012
Epoch 3/3


Training Epoch 3: 100%|██████████| 18/18 [00:01<00:00, 11.08it/s, loss=0.386]


Epoch 3 Loss: 7.9003
Test Metrics: Precision=0.8635, Recall=0.8635, F1=0.8635

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 140, Split 7...


Map: 100%|██████████| 140/140 [00:00<00:00, 7757.38 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 6690.24 examples/s]
Map: 100%|██████████| 1080/1080 [00:00<00:00, 9396.91 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 18/18 [00:01<00:00, 11.65it/s, loss=0.584]


Epoch 1 Loss: 19.0366
Epoch 2/3


Training Epoch 2: 100%|██████████| 18/18 [00:01<00:00, 11.81it/s, loss=0.591]


Epoch 2 Loss: 10.3819
Epoch 3/3


Training Epoch 3: 100%|██████████| 18/18 [00:01<00:00, 12.01it/s, loss=0.31] 


Epoch 3 Loss: 8.0605
Test Metrics: Precision=0.8668, Recall=0.8668, F1=0.8668

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 140, Split 8...


Map: 100%|██████████| 140/140 [00:00<00:00, 8900.78 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 5308.77 examples/s]
Map: 100%|██████████| 1080/1080 [00:00<00:00, 9114.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 18/18 [00:01<00:00, 11.83it/s, loss=0.486]


Epoch 1 Loss: 18.0058
Epoch 2/3


Training Epoch 2: 100%|██████████| 18/18 [00:01<00:00, 12.02it/s, loss=0.96] 


Epoch 2 Loss: 9.6995
Epoch 3/3


Training Epoch 3: 100%|██████████| 18/18 [00:01<00:00, 11.69it/s, loss=0.596]


Epoch 3 Loss: 8.1793
Test Metrics: Precision=0.8655, Recall=0.8655, F1=0.8655

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 140, Split 9...


Map: 100%|██████████| 140/140 [00:00<00:00, 8702.65 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 5104.78 examples/s]
Map: 100%|██████████| 1080/1080 [00:00<00:00, 9289.98 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 18/18 [00:01<00:00, 11.24it/s, loss=0.562]


Epoch 1 Loss: 18.9360
Epoch 2/3


Training Epoch 2: 100%|██████████| 18/18 [00:01<00:00, 11.89it/s, loss=0.487]


Epoch 2 Loss: 10.3687
Epoch 3/3


Training Epoch 3: 100%|██████████| 18/18 [00:01<00:00, 12.46it/s, loss=0.474]


Epoch 3 Loss: 8.4122
Test Metrics: Precision=0.8672, Recall=0.8672, F1=0.8672

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 140, Split 10...


Map: 100%|██████████| 140/140 [00:00<00:00, 8238.55 examples/s]
Map: 100%|██████████| 28/28 [00:00<00:00, 6425.59 examples/s]
Map: 100%|██████████| 1080/1080 [00:00<00:00, 9035.70 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 18/18 [00:01<00:00, 12.18it/s, loss=0.344]


Epoch 1 Loss: 16.8337
Epoch 2/3


Training Epoch 2: 100%|██████████| 18/18 [00:01<00:00, 11.79it/s, loss=0.514]


Epoch 2 Loss: 8.9320
Epoch 3/3


Training Epoch 3: 100%|██████████| 18/18 [00:01<00:00, 11.43it/s, loss=0.435]


Epoch 3 Loss: 6.8626
Test Metrics: Precision=0.8671, Recall=0.8671, F1=0.8671

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 160, Split 1...


Map: 100%|██████████| 160/160 [00:00<00:00, 8302.16 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 5119.30 examples/s]
Map: 100%|██████████| 1056/1056 [00:00<00:00, 9373.86 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 20/20 [00:01<00:00, 11.14it/s, loss=0.574]


Epoch 1 Loss: 18.4742
Epoch 2/3


Training Epoch 2: 100%|██████████| 20/20 [00:01<00:00, 11.62it/s, loss=0.668]


Epoch 2 Loss: 9.2490
Epoch 3/3


Training Epoch 3: 100%|██████████| 20/20 [00:01<00:00, 11.36it/s, loss=0.371]


Epoch 3 Loss: 7.6022
Test Metrics: Precision=0.8695, Recall=0.8695, F1=0.8695

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 160, Split 2...


Map: 100%|██████████| 160/160 [00:00<00:00, 8452.10 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 5253.35 examples/s]
Map: 100%|██████████| 1056/1056 [00:00<00:00, 9179.11 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 20/20 [00:01<00:00, 10.53it/s, loss=0.434]


Epoch 1 Loss: 20.5944
Epoch 2/3


Training Epoch 2: 100%|██████████| 20/20 [00:01<00:00, 10.49it/s, loss=0.353]


Epoch 2 Loss: 10.6170
Epoch 3/3


Training Epoch 3: 100%|██████████| 20/20 [00:01<00:00, 10.45it/s, loss=0.484]


Epoch 3 Loss: 8.8387
Test Metrics: Precision=0.8630, Recall=0.8630, F1=0.8630

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 160, Split 3...


Map: 100%|██████████| 160/160 [00:00<00:00, 8778.48 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 5904.09 examples/s]
Map: 100%|██████████| 1056/1056 [00:00<00:00, 3421.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 20/20 [00:01<00:00, 11.94it/s, loss=0.706]


Epoch 1 Loss: 19.8816
Epoch 2/3


Training Epoch 2: 100%|██████████| 20/20 [00:01<00:00, 12.08it/s, loss=0.528]


Epoch 2 Loss: 10.3260
Epoch 3/3


Training Epoch 3: 100%|██████████| 20/20 [00:01<00:00, 12.23it/s, loss=0.366]


Epoch 3 Loss: 7.8906
Test Metrics: Precision=0.8669, Recall=0.8669, F1=0.8669

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 160, Split 4...


Map: 100%|██████████| 160/160 [00:00<00:00, 8946.89 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 5279.59 examples/s]
Map: 100%|██████████| 1056/1056 [00:00<00:00, 9051.86 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 20/20 [00:01<00:00, 11.52it/s, loss=0.504]


Epoch 1 Loss: 17.8478
Epoch 2/3


Training Epoch 2: 100%|██████████| 20/20 [00:01<00:00, 11.97it/s, loss=0.521]


Epoch 2 Loss: 8.5882
Epoch 3/3


Training Epoch 3: 100%|██████████| 20/20 [00:01<00:00, 12.37it/s, loss=0.357]


Epoch 3 Loss: 6.9173
Test Metrics: Precision=0.8740, Recall=0.8740, F1=0.8740

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 160, Split 5...


Map: 100%|██████████| 160/160 [00:00<00:00, 8547.92 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 5532.70 examples/s]
Map: 100%|██████████| 1056/1056 [00:00<00:00, 9222.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 20/20 [00:01<00:00, 11.37it/s, loss=0.485]


Epoch 1 Loss: 19.5082
Epoch 2/3


Training Epoch 2: 100%|██████████| 20/20 [00:01<00:00, 11.27it/s, loss=0.462]


Epoch 2 Loss: 11.0507
Epoch 3/3


Training Epoch 3: 100%|██████████| 20/20 [00:01<00:00, 10.93it/s, loss=0.4]  


Epoch 3 Loss: 8.8561
Test Metrics: Precision=0.8630, Recall=0.8630, F1=0.8630

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 160, Split 6...


Map: 100%|██████████| 160/160 [00:00<00:00, 8615.96 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 4385.20 examples/s]
Map: 100%|██████████| 1056/1056 [00:00<00:00, 9559.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 20/20 [00:01<00:00, 10.33it/s, loss=0.45] 


Epoch 1 Loss: 19.1502
Epoch 2/3


Training Epoch 2: 100%|██████████| 20/20 [00:01<00:00, 10.83it/s, loss=0.484]


Epoch 2 Loss: 10.2364
Epoch 3/3


Training Epoch 3: 100%|██████████| 20/20 [00:01<00:00, 11.04it/s, loss=0.263]


Epoch 3 Loss: 8.1659
Test Metrics: Precision=0.8680, Recall=0.8680, F1=0.8680

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 160, Split 7...


Map: 100%|██████████| 160/160 [00:00<00:00, 8253.97 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 5156.67 examples/s]
Map: 100%|██████████| 1056/1056 [00:00<00:00, 9230.85 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 20/20 [00:01<00:00, 11.21it/s, loss=0.667]


Epoch 1 Loss: 19.0388
Epoch 2/3


Training Epoch 2: 100%|██████████| 20/20 [00:01<00:00, 11.64it/s, loss=0.319]


Epoch 2 Loss: 10.0241
Epoch 3/3


Training Epoch 3: 100%|██████████| 20/20 [00:01<00:00, 11.56it/s, loss=0.397]


Epoch 3 Loss: 8.2144
Test Metrics: Precision=0.8756, Recall=0.8756, F1=0.8756

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 160, Split 8...


Map: 100%|██████████| 160/160 [00:00<00:00, 8956.81 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 4785.80 examples/s]
Map: 100%|██████████| 1056/1056 [00:00<00:00, 9217.98 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 20/20 [00:01<00:00, 11.88it/s, loss=0.537]


Epoch 1 Loss: 18.5036
Epoch 2/3


Training Epoch 2: 100%|██████████| 20/20 [00:01<00:00, 12.34it/s, loss=0.328]


Epoch 2 Loss: 10.1016
Epoch 3/3


Training Epoch 3: 100%|██████████| 20/20 [00:01<00:00, 11.57it/s, loss=0.479]


Epoch 3 Loss: 8.3663
Test Metrics: Precision=0.8720, Recall=0.8720, F1=0.8720

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 160, Split 9...


Map: 100%|██████████| 160/160 [00:00<00:00, 8352.38 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 5602.44 examples/s]
Map: 100%|██████████| 1056/1056 [00:00<00:00, 8741.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 20/20 [00:01<00:00, 11.99it/s, loss=0.569]


Epoch 1 Loss: 19.5665
Epoch 2/3


Training Epoch 2: 100%|██████████| 20/20 [00:01<00:00, 12.39it/s, loss=0.355]


Epoch 2 Loss: 10.1725
Epoch 3/3


Training Epoch 3: 100%|██████████| 20/20 [00:01<00:00, 11.79it/s, loss=0.483]


Epoch 3 Loss: 7.8506
Test Metrics: Precision=0.8741, Recall=0.8741, F1=0.8741

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 160, Split 10...


Map: 100%|██████████| 160/160 [00:00<00:00, 7982.31 examples/s]
Map: 100%|██████████| 32/32 [00:00<00:00, 5614.86 examples/s]
Map: 100%|██████████| 1056/1056 [00:00<00:00, 9277.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 20/20 [00:01<00:00, 10.84it/s, loss=0.604]


Epoch 1 Loss: 20.0202
Epoch 2/3


Training Epoch 2: 100%|██████████| 20/20 [00:01<00:00, 10.62it/s, loss=0.525]


Epoch 2 Loss: 10.6077
Epoch 3/3


Training Epoch 3: 100%|██████████| 20/20 [00:01<00:00, 10.44it/s, loss=0.424]


Epoch 3 Loss: 8.5024
Test Metrics: Precision=0.8707, Recall=0.8707, F1=0.8707

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 180, Split 1...


Map: 100%|██████████| 180/180 [00:00<00:00, 8485.05 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 6196.44 examples/s]
Map: 100%|██████████| 1032/1032 [00:00<00:00, 9422.22 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 23/23 [00:01<00:00, 11.72it/s, loss=0.294]


Epoch 1 Loss: 19.1109
Epoch 2/3


Training Epoch 2: 100%|██████████| 23/23 [00:02<00:00, 11.30it/s, loss=0.457]


Epoch 2 Loss: 10.1818
Epoch 3/3


Training Epoch 3: 100%|██████████| 23/23 [00:02<00:00, 11.14it/s, loss=0.56] 


Epoch 3 Loss: 8.4254
Test Metrics: Precision=0.8715, Recall=0.8715, F1=0.8715

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 180, Split 2...


Map: 100%|██████████| 180/180 [00:00<00:00, 8715.54 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 5655.88 examples/s]
Map: 100%|██████████| 1032/1032 [00:00<00:00, 9331.04 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 23/23 [00:02<00:00, 11.15it/s, loss=0.785]


Epoch 1 Loss: 22.9872
Epoch 2/3


Training Epoch 2: 100%|██████████| 23/23 [00:02<00:00, 11.34it/s, loss=0.303]


Epoch 2 Loss: 11.6111
Epoch 3/3


Training Epoch 3: 100%|██████████| 23/23 [00:02<00:00, 10.89it/s, loss=0.217]


Epoch 3 Loss: 9.3418
Test Metrics: Precision=0.8738, Recall=0.8738, F1=0.8738

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 180, Split 3...


Map: 100%|██████████| 180/180 [00:00<00:00, 8494.89 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 6398.90 examples/s]
Map: 100%|██████████| 1032/1032 [00:00<00:00, 9245.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 23/23 [00:01<00:00, 11.59it/s, loss=0.763]


Epoch 1 Loss: 21.3023
Epoch 2/3


Training Epoch 2: 100%|██████████| 23/23 [00:02<00:00, 11.50it/s, loss=0.279]


Epoch 2 Loss: 10.5300
Epoch 3/3


Training Epoch 3: 100%|██████████| 23/23 [00:01<00:00, 11.63it/s, loss=0.337]


Epoch 3 Loss: 8.5184
Test Metrics: Precision=0.8711, Recall=0.8711, F1=0.8711

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 180, Split 4...


Map: 100%|██████████| 180/180 [00:00<00:00, 9102.33 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 5714.53 examples/s]
Map: 100%|██████████| 1032/1032 [00:00<00:00, 9049.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 23/23 [00:01<00:00, 12.52it/s, loss=0.708]


Epoch 1 Loss: 20.2288
Epoch 2/3


Training Epoch 2: 100%|██████████| 23/23 [00:01<00:00, 12.23it/s, loss=0.414]


Epoch 2 Loss: 10.0260
Epoch 3/3


Training Epoch 3: 100%|██████████| 23/23 [00:01<00:00, 12.24it/s, loss=0.349]


Epoch 3 Loss: 7.9499
Test Metrics: Precision=0.8733, Recall=0.8733, F1=0.8733

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 180, Split 5...


Map: 100%|██████████| 180/180 [00:00<00:00, 8324.97 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 6139.00 examples/s]
Map: 100%|██████████| 1032/1032 [00:00<00:00, 9265.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 23/23 [00:01<00:00, 12.29it/s, loss=0.84] 


Epoch 1 Loss: 22.3177
Epoch 2/3


Training Epoch 2: 100%|██████████| 23/23 [00:01<00:00, 11.89it/s, loss=0.742]


Epoch 2 Loss: 11.7389
Epoch 3/3


Training Epoch 3: 100%|██████████| 23/23 [00:02<00:00, 11.27it/s, loss=0.345]


Epoch 3 Loss: 9.5396
Test Metrics: Precision=0.8668, Recall=0.8668, F1=0.8668

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 180, Split 6...


Map: 100%|██████████| 180/180 [00:00<00:00, 8675.78 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 5809.29 examples/s]
Map: 100%|██████████| 1032/1032 [00:00<00:00, 9361.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 23/23 [00:01<00:00, 11.55it/s, loss=0.502]


Epoch 1 Loss: 20.5065
Epoch 2/3


Training Epoch 2: 100%|██████████| 23/23 [00:02<00:00, 11.09it/s, loss=0.245]


Epoch 2 Loss: 10.9034
Epoch 3/3


Training Epoch 3: 100%|██████████| 23/23 [00:02<00:00, 10.85it/s, loss=0.352]


Epoch 3 Loss: 8.3810
Test Metrics: Precision=0.8769, Recall=0.8769, F1=0.8769

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 180, Split 7...


Map: 100%|██████████| 180/180 [00:00<00:00, 8007.37 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 5444.59 examples/s]
Map: 100%|██████████| 1032/1032 [00:00<00:00, 9351.26 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 23/23 [00:02<00:00, 11.12it/s, loss=0.892]


Epoch 1 Loss: 20.3345
Epoch 2/3


Training Epoch 2: 100%|██████████| 23/23 [00:01<00:00, 11.73it/s, loss=0.416]


Epoch 2 Loss: 10.5449
Epoch 3/3


Training Epoch 3: 100%|██████████| 23/23 [00:02<00:00, 11.38it/s, loss=0.311]


Epoch 3 Loss: 8.5645
Test Metrics: Precision=0.8815, Recall=0.8815, F1=0.8815

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 180, Split 8...


Map: 100%|██████████| 180/180 [00:00<00:00, 9116.51 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 6244.62 examples/s]
Map: 100%|██████████| 1032/1032 [00:00<00:00, 9292.84 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 23/23 [00:01<00:00, 11.68it/s, loss=0.571]


Epoch 1 Loss: 22.4129
Epoch 2/3


Training Epoch 2: 100%|██████████| 23/23 [00:01<00:00, 11.71it/s, loss=0.473]


Epoch 2 Loss: 12.3155
Epoch 3/3


Training Epoch 3: 100%|██████████| 23/23 [00:01<00:00, 11.54it/s, loss=0.637]


Epoch 3 Loss: 10.2599
Test Metrics: Precision=0.8727, Recall=0.8727, F1=0.8727

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 180, Split 9...


Map: 100%|██████████| 180/180 [00:00<00:00, 8986.19 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 5268.12 examples/s]
Map: 100%|██████████| 1032/1032 [00:00<00:00, 9009.38 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 23/23 [00:01<00:00, 12.05it/s, loss=0.551]


Epoch 1 Loss: 22.4720
Epoch 2/3


Training Epoch 2: 100%|██████████| 23/23 [00:01<00:00, 12.27it/s, loss=0.851]


Epoch 2 Loss: 11.5058
Epoch 3/3


Training Epoch 3: 100%|██████████| 23/23 [00:01<00:00, 11.66it/s, loss=0.478]


Epoch 3 Loss: 9.0798
Test Metrics: Precision=0.8692, Recall=0.8692, F1=0.8692

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 180, Split 10...


Map: 100%|██████████| 180/180 [00:00<00:00, 7601.51 examples/s]
Map: 100%|██████████| 36/36 [00:00<00:00, 5152.71 examples/s]
Map: 100%|██████████| 1032/1032 [00:00<00:00, 9114.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 23/23 [00:02<00:00, 11.00it/s, loss=0.712]


Epoch 1 Loss: 20.4543
Epoch 2/3


Training Epoch 2: 100%|██████████| 23/23 [00:02<00:00, 10.79it/s, loss=0.343]


Epoch 2 Loss: 10.4033
Epoch 3/3


Training Epoch 3: 100%|██████████| 23/23 [00:02<00:00, 11.05it/s, loss=0.541]


Epoch 3 Loss: 8.3278
Test Metrics: Precision=0.8768, Recall=0.8768, F1=0.8768

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 200, Split 1...


Map: 100%|██████████| 200/200 [00:00<00:00, 7920.28 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 6259.45 examples/s]
Map: 100%|██████████| 1008/1008 [00:00<00:00, 8766.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 25/25 [00:02<00:00, 10.99it/s, loss=0.752]


Epoch 1 Loss: 21.9060
Epoch 2/3


Training Epoch 2: 100%|██████████| 25/25 [00:02<00:00, 11.03it/s, loss=0.306]


Epoch 2 Loss: 11.2290
Epoch 3/3


Training Epoch 3: 100%|██████████| 25/25 [00:02<00:00, 10.83it/s, loss=0.416]


Epoch 3 Loss: 8.7736
Test Metrics: Precision=0.8751, Recall=0.8751, F1=0.8751

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 200, Split 2...


Map: 100%|██████████| 200/200 [00:00<00:00, 8768.37 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 6539.30 examples/s]
Map: 100%|██████████| 1008/1008 [00:00<00:00, 8986.42 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 25/25 [00:02<00:00, 11.00it/s, loss=0.607]


Epoch 1 Loss: 22.3873
Epoch 2/3


Training Epoch 2: 100%|██████████| 25/25 [00:02<00:00, 11.33it/s, loss=0.429]


Epoch 2 Loss: 11.1750
Epoch 3/3


Training Epoch 3: 100%|██████████| 25/25 [00:02<00:00, 11.10it/s, loss=0.522]


Epoch 3 Loss: 9.1968
Test Metrics: Precision=0.8743, Recall=0.8743, F1=0.8743

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 200, Split 3...


Map: 100%|██████████| 200/200 [00:00<00:00, 7936.17 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 5872.73 examples/s]
Map: 100%|██████████| 1008/1008 [00:00<00:00, 9215.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 25/25 [00:02<00:00, 10.69it/s, loss=0.399]


Epoch 1 Loss: 22.8829
Epoch 2/3


Training Epoch 2: 100%|██████████| 25/25 [00:02<00:00, 11.45it/s, loss=0.396]


Epoch 2 Loss: 11.0828
Epoch 3/3


Training Epoch 3: 100%|██████████| 25/25 [00:02<00:00, 11.26it/s, loss=0.235]


Epoch 3 Loss: 8.5243
Test Metrics: Precision=0.8845, Recall=0.8845, F1=0.8845

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 200, Split 4...


Map: 100%|██████████| 200/200 [00:00<00:00, 8425.93 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 6305.80 examples/s]
Map: 100%|██████████| 1008/1008 [00:00<00:00, 8960.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 25/25 [00:02<00:00, 11.68it/s, loss=0.512]


Epoch 1 Loss: 22.0341
Epoch 2/3


Training Epoch 2: 100%|██████████| 25/25 [00:02<00:00, 12.14it/s, loss=0.352]


Epoch 2 Loss: 10.0616
Epoch 3/3


Training Epoch 3: 100%|██████████| 25/25 [00:02<00:00, 11.65it/s, loss=0.264]


Epoch 3 Loss: 7.6711
Test Metrics: Precision=0.8829, Recall=0.8829, F1=0.8829

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 200, Split 5...


Map: 100%|██████████| 200/200 [00:00<00:00, 8511.69 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 6461.72 examples/s]
Map: 100%|██████████| 1008/1008 [00:00<00:00, 9225.15 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 25/25 [00:02<00:00, 11.05it/s, loss=0.601]


Epoch 1 Loss: 23.9653
Epoch 2/3


Training Epoch 2: 100%|██████████| 25/25 [00:02<00:00, 11.12it/s, loss=0.599]


Epoch 2 Loss: 12.7766
Epoch 3/3


Training Epoch 3: 100%|██████████| 25/25 [00:02<00:00, 11.23it/s, loss=0.254]


Epoch 3 Loss: 9.5071
Test Metrics: Precision=0.8786, Recall=0.8786, F1=0.8786

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 200, Split 6...


Map: 100%|██████████| 200/200 [00:00<00:00, 8269.93 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 6047.15 examples/s]
Map: 100%|██████████| 1008/1008 [00:00<00:00, 9190.22 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 25/25 [00:02<00:00, 11.43it/s, loss=0.431]


Epoch 1 Loss: 22.7328
Epoch 2/3


Training Epoch 2: 100%|██████████| 25/25 [00:02<00:00, 10.57it/s, loss=0.363]


Epoch 2 Loss: 11.1388
Epoch 3/3


Training Epoch 3: 100%|██████████| 25/25 [00:02<00:00, 10.51it/s, loss=0.257]


Epoch 3 Loss: 9.1650
Test Metrics: Precision=0.8799, Recall=0.8799, F1=0.8799

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 200, Split 7...


Map: 100%|██████████| 200/200 [00:00<00:00, 8036.37 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 5758.24 examples/s]
Map: 100%|██████████| 1008/1008 [00:00<00:00, 9423.53 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 25/25 [00:02<00:00, 10.65it/s, loss=0.46] 


Epoch 1 Loss: 22.3984
Epoch 2/3


Training Epoch 2: 100%|██████████| 25/25 [00:02<00:00, 11.01it/s, loss=0.453]


Epoch 2 Loss: 11.4946
Epoch 3/3


Training Epoch 3: 100%|██████████| 25/25 [00:02<00:00, 11.02it/s, loss=0.374]


Epoch 3 Loss: 8.9392
Test Metrics: Precision=0.8830, Recall=0.8830, F1=0.8830

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 200, Split 8...


Map: 100%|██████████| 200/200 [00:00<00:00, 8965.25 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 6681.75 examples/s]
Map: 100%|██████████| 1008/1008 [00:00<00:00, 8929.63 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 25/25 [00:02<00:00, 11.45it/s, loss=0.609]


Epoch 1 Loss: 21.6733
Epoch 2/3


Training Epoch 2: 100%|██████████| 25/25 [00:02<00:00, 11.61it/s, loss=0.417]


Epoch 2 Loss: 12.1762
Epoch 3/3


Training Epoch 3: 100%|██████████| 25/25 [00:02<00:00, 11.22it/s, loss=0.207]


Epoch 3 Loss: 9.5535
Test Metrics: Precision=0.8817, Recall=0.8817, F1=0.8817

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 200, Split 9...


Map: 100%|██████████| 200/200 [00:00<00:00, 8804.16 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 6437.42 examples/s]
Map: 100%|██████████| 1008/1008 [00:00<00:00, 8587.95 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 25/25 [00:02<00:00, 11.87it/s, loss=0.648]


Epoch 1 Loss: 23.6723
Epoch 2/3


Training Epoch 2: 100%|██████████| 25/25 [00:02<00:00, 11.47it/s, loss=0.626]


Epoch 2 Loss: 11.5752
Epoch 3/3


Training Epoch 3: 100%|██████████| 25/25 [00:02<00:00, 12.25it/s, loss=0.291]


Epoch 3 Loss: 9.0424
Test Metrics: Precision=0.8747, Recall=0.8747, F1=0.8747

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 200, Split 10...


Map: 100%|██████████| 200/200 [00:00<00:00, 7810.40 examples/s]
Map: 100%|██████████| 40/40 [00:00<00:00, 5532.47 examples/s]
Map: 100%|██████████| 1008/1008 [00:00<00:00, 9071.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 25/25 [00:02<00:00, 10.46it/s, loss=0.465]


Epoch 1 Loss: 22.0894
Epoch 2/3


Training Epoch 2: 100%|██████████| 25/25 [00:02<00:00, 10.63it/s, loss=0.44] 


Epoch 2 Loss: 11.1656
Epoch 3/3


Training Epoch 3: 100%|██████████| 25/25 [00:02<00:00, 10.65it/s, loss=0.45] 


Epoch 3 Loss: 9.0430
Test Metrics: Precision=0.8760, Recall=0.8760, F1=0.8760

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 220, Split 1...


Map: 100%|██████████| 220/220 [00:00<00:00, 8490.96 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 6834.40 examples/s]
Map: 100%|██████████| 984/984 [00:00<00:00, 8817.14 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 28/28 [00:02<00:00, 11.87it/s, loss=0.414]


Epoch 1 Loss: 24.6063
Epoch 2/3


Training Epoch 2: 100%|██████████| 28/28 [00:02<00:00, 11.62it/s, loss=0.46] 


Epoch 2 Loss: 11.9836
Epoch 3/3


Training Epoch 3: 100%|██████████| 28/28 [00:02<00:00, 11.66it/s, loss=0.614]


Epoch 3 Loss: 9.6336
Test Metrics: Precision=0.8760, Recall=0.8760, F1=0.8760

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 220, Split 2...


Map: 100%|██████████| 220/220 [00:00<00:00, 9155.23 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 5128.22 examples/s]
Map: 100%|██████████| 984/984 [00:00<00:00, 9045.27 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 28/28 [00:02<00:00, 11.24it/s, loss=0.37] 


Epoch 1 Loss: 22.2839
Epoch 2/3


Training Epoch 2: 100%|██████████| 28/28 [00:02<00:00, 11.24it/s, loss=0.228]


Epoch 2 Loss: 12.4003
Epoch 3/3


Training Epoch 3: 100%|██████████| 28/28 [00:02<00:00, 11.25it/s, loss=0.149]


Epoch 3 Loss: 9.7011
Test Metrics: Precision=0.8813, Recall=0.8813, F1=0.8813

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 220, Split 3...


Map: 100%|██████████| 220/220 [00:00<00:00, 8695.32 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 6368.38 examples/s]
Map: 100%|██████████| 984/984 [00:00<00:00, 9242.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 28/28 [00:02<00:00, 10.90it/s, loss=0.568]


Epoch 1 Loss: 22.9279
Epoch 2/3


Training Epoch 2: 100%|██████████| 28/28 [00:02<00:00, 10.94it/s, loss=0.333]


Epoch 2 Loss: 11.6362
Epoch 3/3


Training Epoch 3: 100%|██████████| 28/28 [00:02<00:00, 10.96it/s, loss=0.242]


Epoch 3 Loss: 8.9496
Test Metrics: Precision=0.8894, Recall=0.8894, F1=0.8894

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 220, Split 4...


Map: 100%|██████████| 220/220 [00:00<00:00, 9638.55 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 6760.55 examples/s]
Map: 100%|██████████| 984/984 [00:00<00:00, 9273.87 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 28/28 [00:02<00:00, 12.00it/s, loss=0.403]


Epoch 1 Loss: 21.8574
Epoch 2/3


Training Epoch 2: 100%|██████████| 28/28 [00:02<00:00, 12.24it/s, loss=0.54] 


Epoch 2 Loss: 10.3362
Epoch 3/3


Training Epoch 3: 100%|██████████| 28/28 [00:02<00:00, 12.78it/s, loss=0.295]


Epoch 3 Loss: 8.1228
Test Metrics: Precision=0.8898, Recall=0.8898, F1=0.8898

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 220, Split 5...


Map: 100%|██████████| 220/220 [00:00<00:00, 8933.64 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 6142.84 examples/s]
Map: 100%|██████████| 984/984 [00:00<00:00, 9393.03 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 28/28 [00:02<00:00, 11.46it/s, loss=0.538]


Epoch 1 Loss: 23.0158
Epoch 2/3


Training Epoch 2: 100%|██████████| 28/28 [00:02<00:00, 11.72it/s, loss=0.53] 


Epoch 2 Loss: 13.1394
Epoch 3/3


Training Epoch 3: 100%|██████████| 28/28 [00:02<00:00, 11.46it/s, loss=0.334]


Epoch 3 Loss: 10.2664
Test Metrics: Precision=0.8853, Recall=0.8853, F1=0.8853

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 220, Split 6...


Map: 100%|██████████| 220/220 [00:00<00:00, 8781.63 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 6380.49 examples/s]
Map: 100%|██████████| 984/984 [00:00<00:00, 9567.58 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 28/28 [00:02<00:00, 11.08it/s, loss=0.419]


Epoch 1 Loss: 24.1002
Epoch 2/3


Training Epoch 2: 100%|██████████| 28/28 [00:02<00:00, 10.54it/s, loss=0.985]


Epoch 2 Loss: 12.7358
Epoch 3/3


Training Epoch 3: 100%|██████████| 28/28 [00:02<00:00, 10.46it/s, loss=0.146]


Epoch 3 Loss: 9.9295
Test Metrics: Precision=0.8801, Recall=0.8801, F1=0.8801

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 220, Split 7...


Map: 100%|██████████| 220/220 [00:00<00:00, 8274.94 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 6036.15 examples/s]
Map: 100%|██████████| 984/984 [00:00<00:00, 9502.88 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 28/28 [00:02<00:00, 10.57it/s, loss=0.313]


Epoch 1 Loss: 21.9173
Epoch 2/3


Training Epoch 2: 100%|██████████| 28/28 [00:02<00:00, 10.74it/s, loss=0.506]


Epoch 2 Loss: 11.6228
Epoch 3/3


Training Epoch 3: 100%|██████████| 28/28 [00:02<00:00, 11.12it/s, loss=0.334]


Epoch 3 Loss: 9.5292
Test Metrics: Precision=0.8890, Recall=0.8890, F1=0.8890

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 220, Split 8...


Map: 100%|██████████| 220/220 [00:00<00:00, 8535.81 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 5922.07 examples/s]
Map: 100%|██████████| 984/984 [00:00<00:00, 8749.82 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 28/28 [00:02<00:00, 11.70it/s, loss=0.733]


Epoch 1 Loss: 24.8107
Epoch 2/3


Training Epoch 2: 100%|██████████| 28/28 [00:02<00:00, 11.72it/s, loss=0.539]


Epoch 2 Loss: 12.8284
Epoch 3/3


Training Epoch 3: 100%|██████████| 28/28 [00:02<00:00, 11.51it/s, loss=0.597]


Epoch 3 Loss: 10.5388
Test Metrics: Precision=0.8893, Recall=0.8893, F1=0.8893

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 220, Split 9...


Map: 100%|██████████| 220/220 [00:00<00:00, 8924.57 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 6199.80 examples/s]
Map: 100%|██████████| 984/984 [00:00<00:00, 8677.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 28/28 [00:02<00:00, 12.50it/s, loss=0.582]


Epoch 1 Loss: 24.4933
Epoch 2/3


Training Epoch 2: 100%|██████████| 28/28 [00:02<00:00, 11.98it/s, loss=0.387]


Epoch 2 Loss: 12.4461
Epoch 3/3


Training Epoch 3: 100%|██████████| 28/28 [00:02<00:00, 11.80it/s, loss=0.143]


Epoch 3 Loss: 9.7260
Test Metrics: Precision=0.8793, Recall=0.8793, F1=0.8793

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 220, Split 10...


Map: 100%|██████████| 220/220 [00:00<00:00, 7716.50 examples/s]
Map: 100%|██████████| 44/44 [00:00<00:00, 5303.75 examples/s]
Map: 100%|██████████| 984/984 [00:00<00:00, 9421.70 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 28/28 [00:02<00:00, 10.19it/s, loss=0.209]


Epoch 1 Loss: 22.6522
Epoch 2/3


Training Epoch 2: 100%|██████████| 28/28 [00:02<00:00, 10.10it/s, loss=0.728]


Epoch 2 Loss: 12.1321
Epoch 3/3


Training Epoch 3: 100%|██████████| 28/28 [00:02<00:00, 10.40it/s, loss=0.305]


Epoch 3 Loss: 9.3537
Test Metrics: Precision=0.8856, Recall=0.8856, F1=0.8856

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 240, Split 1...


Map: 100%|██████████| 240/240 [00:00<00:00, 8311.79 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 6975.97 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9117.10 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 30/30 [00:02<00:00, 11.40it/s, loss=0.324]


Epoch 1 Loss: 24.4863
Epoch 2/3


Training Epoch 2: 100%|██████████| 30/30 [00:02<00:00, 11.44it/s, loss=0.305]


Epoch 2 Loss: 12.3785
Epoch 3/3


Training Epoch 3: 100%|██████████| 30/30 [00:02<00:00, 11.40it/s, loss=0.316]


Epoch 3 Loss: 9.9645
Test Metrics: Precision=0.8812, Recall=0.8812, F1=0.8812

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 240, Split 2...


Map: 100%|██████████| 240/240 [00:00<00:00, 8874.25 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 6119.72 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9039.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 30/30 [00:02<00:00, 10.87it/s, loss=0.579]


Epoch 1 Loss: 24.2101
Epoch 2/3


Training Epoch 2: 100%|██████████| 30/30 [00:02<00:00, 10.98it/s, loss=0.435]


Epoch 2 Loss: 12.8468
Epoch 3/3


Training Epoch 3: 100%|██████████| 30/30 [00:02<00:00, 11.09it/s, loss=0.149]


Epoch 3 Loss: 10.5005
Test Metrics: Precision=0.8819, Recall=0.8819, F1=0.8819

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 240, Split 3...


Map: 100%|██████████| 240/240 [00:00<00:00, 8348.74 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 6334.01 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9319.27 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 30/30 [00:02<00:00, 10.92it/s, loss=0.573]


Epoch 1 Loss: 22.3454
Epoch 2/3


Training Epoch 2: 100%|██████████| 30/30 [00:02<00:00, 11.06it/s, loss=0.3]  


Epoch 2 Loss: 11.1486
Epoch 3/3


Training Epoch 3: 100%|██████████| 30/30 [00:02<00:00, 11.00it/s, loss=0.217]


Epoch 3 Loss: 8.5764
Test Metrics: Precision=0.8942, Recall=0.8942, F1=0.8942

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 240, Split 4...


Map: 100%|██████████| 240/240 [00:00<00:00, 8879.03 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 5604.86 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9144.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 30/30 [00:02<00:00, 11.59it/s, loss=0.467]


Epoch 1 Loss: 24.0147
Epoch 2/3


Training Epoch 2: 100%|██████████| 30/30 [00:02<00:00, 12.82it/s, loss=0.442]


Epoch 2 Loss: 11.3709
Epoch 3/3


Training Epoch 3: 100%|██████████| 30/30 [00:02<00:00, 11.84it/s, loss=0.329]


Epoch 3 Loss: 8.7868
Test Metrics: Precision=0.8900, Recall=0.8900, F1=0.8900

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 240, Split 5...


Map: 100%|██████████| 240/240 [00:00<00:00, 8998.16 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 5971.25 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9143.85 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 30/30 [00:02<00:00, 10.94it/s, loss=0.549]


Epoch 1 Loss: 25.0676
Epoch 2/3


Training Epoch 2: 100%|██████████| 30/30 [00:02<00:00, 10.92it/s, loss=0.396]


Epoch 2 Loss: 13.8597
Epoch 3/3


Training Epoch 3: 100%|██████████| 30/30 [00:02<00:00, 11.11it/s, loss=0.349]


Epoch 3 Loss: 10.6900
Test Metrics: Precision=0.8881, Recall=0.8881, F1=0.8881

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 240, Split 6...


Map: 100%|██████████| 240/240 [00:00<00:00, 7896.96 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 6616.93 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9281.74 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 30/30 [00:02<00:00, 10.69it/s, loss=0.534]


Epoch 1 Loss: 26.9684
Epoch 2/3


Training Epoch 2: 100%|██████████| 30/30 [00:02<00:00, 10.61it/s, loss=0.426]


Epoch 2 Loss: 14.5770
Epoch 3/3


Training Epoch 3: 100%|██████████| 30/30 [00:02<00:00, 10.99it/s, loss=0.274]


Epoch 3 Loss: 11.4930
Test Metrics: Precision=0.8794, Recall=0.8794, F1=0.8794

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 240, Split 7...


Map: 100%|██████████| 240/240 [00:00<00:00, 8028.40 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 6672.63 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9645.31 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 30/30 [00:02<00:00, 10.56it/s, loss=0.453]


Epoch 1 Loss: 23.7926
Epoch 2/3


Training Epoch 2: 100%|██████████| 30/30 [00:02<00:00, 10.62it/s, loss=0.375]


Epoch 2 Loss: 12.6775
Epoch 3/3


Training Epoch 3: 100%|██████████| 30/30 [00:02<00:00, 11.00it/s, loss=0.384]


Epoch 3 Loss: 10.0361
Test Metrics: Precision=0.8961, Recall=0.8961, F1=0.8961

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 240, Split 8...


Map: 100%|██████████| 240/240 [00:00<00:00, 9387.43 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 7171.03 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9205.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 30/30 [00:02<00:00, 11.47it/s, loss=0.564]


Epoch 1 Loss: 26.3817
Epoch 2/3


Training Epoch 2: 100%|██████████| 30/30 [00:02<00:00, 11.21it/s, loss=0.498]


Epoch 2 Loss: 13.9610
Epoch 3/3


Training Epoch 3: 100%|██████████| 30/30 [00:02<00:00, 11.36it/s, loss=0.443]


Epoch 3 Loss: 11.2462
Test Metrics: Precision=0.8883, Recall=0.8883, F1=0.8883

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 240, Split 9...


Map: 100%|██████████| 240/240 [00:00<00:00, 8767.28 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 4940.04 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9231.50 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 30/30 [00:02<00:00, 11.73it/s, loss=0.694]


Epoch 1 Loss: 24.9332
Epoch 2/3


Training Epoch 2: 100%|██████████| 30/30 [00:02<00:00, 11.78it/s, loss=0.417]


Epoch 2 Loss: 12.3463
Epoch 3/3


Training Epoch 3: 100%|██████████| 30/30 [00:02<00:00, 11.60it/s, loss=0.445]


Epoch 3 Loss: 9.7510
Test Metrics: Precision=0.8879, Recall=0.8879, F1=0.8879

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 240, Split 10...


Map: 100%|██████████| 240/240 [00:00<00:00, 8139.41 examples/s]
Map: 100%|██████████| 48/48 [00:00<00:00, 6769.10 examples/s]
Map: 100%|██████████| 960/960 [00:00<00:00, 9494.84 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 30/30 [00:02<00:00, 10.39it/s, loss=0.637]


Epoch 1 Loss: 26.4921
Epoch 2/3


Training Epoch 2: 100%|██████████| 30/30 [00:02<00:00, 10.56it/s, loss=0.36] 


Epoch 2 Loss: 13.7812
Epoch 3/3


Training Epoch 3: 100%|██████████| 30/30 [00:02<00:00, 10.39it/s, loss=0.311]


Epoch 3 Loss: 10.5222
Test Metrics: Precision=0.8843, Recall=0.8843, F1=0.8843

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 260, Split 1...


Map: 100%|██████████| 260/260 [00:00<00:00, 8981.90 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 5375.32 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 9509.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 33/33 [00:02<00:00, 11.33it/s, loss=0.344]


Epoch 1 Loss: 24.5847
Epoch 2/3


Training Epoch 2: 100%|██████████| 33/33 [00:02<00:00, 11.68it/s, loss=0.196]


Epoch 2 Loss: 12.1096
Epoch 3/3


Training Epoch 3: 100%|██████████| 33/33 [00:02<00:00, 11.52it/s, loss=0.396]


Epoch 3 Loss: 9.1939
Test Metrics: Precision=0.8964, Recall=0.8964, F1=0.8964

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 260, Split 2...


Map: 100%|██████████| 260/260 [00:00<00:00, 8682.34 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 6985.80 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 9363.80 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 33/33 [00:03<00:00, 10.83it/s, loss=0.387]


Epoch 1 Loss: 25.8687
Epoch 2/3


Training Epoch 2: 100%|██████████| 33/33 [00:02<00:00, 11.41it/s, loss=0.404]


Epoch 2 Loss: 12.8939
Epoch 3/3


Training Epoch 3: 100%|██████████| 33/33 [00:02<00:00, 11.60it/s, loss=0.339]


Epoch 3 Loss: 9.8656
Test Metrics: Precision=0.8911, Recall=0.8911, F1=0.8911

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 260, Split 3...


Map: 100%|██████████| 260/260 [00:00<00:00, 8988.19 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 6912.74 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 9480.21 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 33/33 [00:02<00:00, 11.48it/s, loss=0.669]


Epoch 1 Loss: 25.4772
Epoch 2/3


Training Epoch 2: 100%|██████████| 33/33 [00:02<00:00, 11.79it/s, loss=0.334]


Epoch 2 Loss: 12.8816
Epoch 3/3


Training Epoch 3: 100%|██████████| 33/33 [00:02<00:00, 11.30it/s, loss=0.168]


Epoch 3 Loss: 9.6844
Test Metrics: Precision=0.8913, Recall=0.8913, F1=0.8913

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 260, Split 4...


Map: 100%|██████████| 260/260 [00:00<00:00, 9449.66 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 6960.83 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 9030.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 33/33 [00:02<00:00, 12.20it/s, loss=0.329]


Epoch 1 Loss: 23.7463
Epoch 2/3


Training Epoch 2: 100%|██████████| 33/33 [00:02<00:00, 12.18it/s, loss=0.231]


Epoch 2 Loss: 11.5445
Epoch 3/3


Training Epoch 3: 100%|██████████| 33/33 [00:02<00:00, 12.35it/s, loss=0.236]


Epoch 3 Loss: 8.4406
Test Metrics: Precision=0.8993, Recall=0.8993, F1=0.8993

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 260, Split 5...


Map: 100%|██████████| 260/260 [00:00<00:00, 8401.40 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 7346.28 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 8900.32 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 33/33 [00:02<00:00, 11.72it/s, loss=0.496]


Epoch 1 Loss: 27.5378
Epoch 2/3


Training Epoch 2: 100%|██████████| 33/33 [00:02<00:00, 11.19it/s, loss=0.414]


Epoch 2 Loss: 13.9132
Epoch 3/3


Training Epoch 3: 100%|██████████| 33/33 [00:03<00:00, 10.89it/s, loss=0.21] 


Epoch 3 Loss: 11.0527
Test Metrics: Precision=0.8941, Recall=0.8941, F1=0.8941

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 260, Split 6...


Map: 100%|██████████| 260/260 [00:00<00:00, 8851.76 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 6782.68 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 9233.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 33/33 [00:03<00:00, 10.59it/s, loss=0.224]


Epoch 1 Loss: 26.1430
Epoch 2/3


Training Epoch 2: 100%|██████████| 33/33 [00:03<00:00, 10.84it/s, loss=0.264]


Epoch 2 Loss: 12.7191
Epoch 3/3


Training Epoch 3: 100%|██████████| 33/33 [00:03<00:00, 10.77it/s, loss=0.234]


Epoch 3 Loss: 9.9824
Test Metrics: Precision=0.8967, Recall=0.8967, F1=0.8967

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 260, Split 7...


Map: 100%|██████████| 260/260 [00:00<00:00, 7385.64 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 5872.32 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 9008.46 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 33/33 [00:03<00:00, 10.80it/s, loss=0.29] 


Epoch 1 Loss: 26.3179
Epoch 2/3


Training Epoch 2: 100%|██████████| 33/33 [00:02<00:00, 11.23it/s, loss=0.438]


Epoch 2 Loss: 13.3713
Epoch 3/3


Training Epoch 3: 100%|██████████| 33/33 [00:03<00:00, 10.95it/s, loss=0.341]


Epoch 3 Loss: 10.4066
Test Metrics: Precision=0.8961, Recall=0.8961, F1=0.8961

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 260, Split 8...


Map: 100%|██████████| 260/260 [00:00<00:00, 9402.65 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 7112.00 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 8813.34 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 33/33 [00:02<00:00, 11.66it/s, loss=0.417]


Epoch 1 Loss: 27.6590
Epoch 2/3


Training Epoch 2: 100%|██████████| 33/33 [00:02<00:00, 11.44it/s, loss=0.365]


Epoch 2 Loss: 14.0608
Epoch 3/3


Training Epoch 3: 100%|██████████| 33/33 [00:02<00:00, 11.57it/s, loss=0.349]


Epoch 3 Loss: 11.0607
Test Metrics: Precision=0.8969, Recall=0.8969, F1=0.8969

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 260, Split 9...


Map: 100%|██████████| 260/260 [00:00<00:00, 8741.36 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 7175.65 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 8862.83 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 33/33 [00:02<00:00, 11.89it/s, loss=0.566]


Epoch 1 Loss: 24.8313
Epoch 2/3


Training Epoch 2: 100%|██████████| 33/33 [00:02<00:00, 11.45it/s, loss=0.38] 


Epoch 2 Loss: 13.3192
Epoch 3/3


Training Epoch 3: 100%|██████████| 33/33 [00:02<00:00, 11.76it/s, loss=0.285]


Epoch 3 Loss: 10.4253
Test Metrics: Precision=0.8892, Recall=0.8892, F1=0.8892

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 260, Split 10...


Map: 100%|██████████| 260/260 [00:00<00:00, 8474.13 examples/s]
Map: 100%|██████████| 52/52 [00:00<00:00, 7108.76 examples/s]
Map: 100%|██████████| 936/936 [00:00<00:00, 9311.10 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 33/33 [00:03<00:00, 10.76it/s, loss=0.461]


Epoch 1 Loss: 26.6069
Epoch 2/3


Training Epoch 2: 100%|██████████| 33/33 [00:03<00:00, 10.87it/s, loss=0.393]


Epoch 2 Loss: 13.4319
Epoch 3/3


Training Epoch 3: 100%|██████████| 33/33 [00:03<00:00, 10.82it/s, loss=0.367]


Epoch 3 Loss: 10.3319
Test Metrics: Precision=0.8915, Recall=0.8915, F1=0.8915

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 280, Split 1...


Map: 100%|██████████| 280/280 [00:00<00:00, 8466.80 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 6717.03 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 9215.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 35/35 [00:03<00:00, 11.28it/s, loss=0.467]


Epoch 1 Loss: 28.6636
Epoch 2/3


Training Epoch 2: 100%|██████████| 35/35 [00:03<00:00, 11.27it/s, loss=0.318]


Epoch 2 Loss: 14.0665
Epoch 3/3


Training Epoch 3: 100%|██████████| 35/35 [00:03<00:00, 11.46it/s, loss=0.3]  


Epoch 3 Loss: 10.8090
Test Metrics: Precision=0.8894, Recall=0.8894, F1=0.8894

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 280, Split 2...


Map: 100%|██████████| 280/280 [00:00<00:00, 8105.33 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 5817.34 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 9365.68 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 35/35 [00:03<00:00, 11.04it/s, loss=0.324]


Epoch 1 Loss: 27.8427
Epoch 2/3


Training Epoch 2: 100%|██████████| 35/35 [00:03<00:00, 11.45it/s, loss=0.532]


Epoch 2 Loss: 13.6197
Epoch 3/3


Training Epoch 3: 100%|██████████| 35/35 [00:03<00:00, 11.03it/s, loss=0.294]


Epoch 3 Loss: 10.4684
Test Metrics: Precision=0.8915, Recall=0.8915, F1=0.8915

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 280, Split 3...


Map: 100%|██████████| 280/280 [00:00<00:00, 8864.17 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 6860.24 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 9376.95 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 35/35 [00:02<00:00, 11.77it/s, loss=0.437]


Epoch 1 Loss: 28.7461
Epoch 2/3


Training Epoch 2: 100%|██████████| 35/35 [00:03<00:00, 11.19it/s, loss=0.326]


Epoch 2 Loss: 13.5489
Epoch 3/3


Training Epoch 3: 100%|██████████| 35/35 [00:02<00:00, 11.81it/s, loss=0.236]


Epoch 3 Loss: 10.3192
Test Metrics: Precision=0.8900, Recall=0.8900, F1=0.8900

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 280, Split 4...


Map: 100%|██████████| 280/280 [00:00<00:00, 9651.35 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 7526.55 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 9263.82 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 35/35 [00:02<00:00, 12.40it/s, loss=0.495]


Epoch 1 Loss: 25.1035
Epoch 2/3


Training Epoch 2: 100%|██████████| 35/35 [00:02<00:00, 12.42it/s, loss=0.312]


Epoch 2 Loss: 12.2711
Epoch 3/3


Training Epoch 3: 100%|██████████| 35/35 [00:02<00:00, 12.09it/s, loss=0.236]


Epoch 3 Loss: 9.5773
Test Metrics: Precision=0.8967, Recall=0.8967, F1=0.8967

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 280, Split 5...


Map: 100%|██████████| 280/280 [00:00<00:00, 7562.27 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 5024.84 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 7807.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 35/35 [00:03<00:00, 11.11it/s, loss=0.597]


Epoch 1 Loss: 29.1171
Epoch 2/3


Training Epoch 2: 100%|██████████| 35/35 [00:03<00:00, 11.16it/s, loss=0.232]


Epoch 2 Loss: 14.9966
Epoch 3/3


Training Epoch 3: 100%|██████████| 35/35 [00:03<00:00, 10.72it/s, loss=0.241]


Epoch 3 Loss: 11.6096
Test Metrics: Precision=0.8905, Recall=0.8905, F1=0.8905

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 280, Split 6...


Map: 100%|██████████| 280/280 [00:00<00:00, 8366.56 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 7552.93 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 2999.14 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 35/35 [00:03<00:00, 10.88it/s, loss=0.397]


Epoch 1 Loss: 26.8542
Epoch 2/3


Training Epoch 2: 100%|██████████| 35/35 [00:03<00:00, 10.98it/s, loss=0.389]


Epoch 2 Loss: 13.7571
Epoch 3/3


Training Epoch 3: 100%|██████████| 35/35 [00:03<00:00, 11.22it/s, loss=0.373]


Epoch 3 Loss: 10.7876
Test Metrics: Precision=0.8966, Recall=0.8966, F1=0.8966

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 280, Split 7...


Map: 100%|██████████| 280/280 [00:00<00:00, 8075.90 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 7296.48 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 9480.56 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 35/35 [00:03<00:00, 10.96it/s, loss=0.445]


Epoch 1 Loss: 28.4069
Epoch 2/3


Training Epoch 2: 100%|██████████| 35/35 [00:03<00:00, 11.13it/s, loss=0.511]


Epoch 2 Loss: 14.0413
Epoch 3/3


Training Epoch 3: 100%|██████████| 35/35 [00:03<00:00, 10.83it/s, loss=0.269]


Epoch 3 Loss: 11.0797
Test Metrics: Precision=0.8974, Recall=0.8974, F1=0.8974

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 280, Split 8...


Map: 100%|██████████| 280/280 [00:00<00:00, 8777.91 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 7095.03 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 9138.83 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 35/35 [00:03<00:00, 11.38it/s, loss=0.455]


Epoch 1 Loss: 27.5292
Epoch 2/3


Training Epoch 2: 100%|██████████| 35/35 [00:03<00:00, 11.51it/s, loss=0.414]


Epoch 2 Loss: 14.6622
Epoch 3/3


Training Epoch 3: 100%|██████████| 35/35 [00:03<00:00, 11.44it/s, loss=0.324]


Epoch 3 Loss: 11.1791
Test Metrics: Precision=0.8995, Recall=0.8995, F1=0.8995

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 280, Split 9...


Map: 100%|██████████| 280/280 [00:00<00:00, 8658.70 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 4832.84 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 9088.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 35/35 [00:02<00:00, 11.70it/s, loss=0.474]


Epoch 1 Loss: 27.2389
Epoch 2/3


Training Epoch 2: 100%|██████████| 35/35 [00:03<00:00, 11.22it/s, loss=0.364]


Epoch 2 Loss: 14.0123
Epoch 3/3


Training Epoch 3: 100%|██████████| 35/35 [00:03<00:00, 11.17it/s, loss=0.226]


Epoch 3 Loss: 10.9627
Test Metrics: Precision=0.8932, Recall=0.8932, F1=0.8932

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 280, Split 10...


Map: 100%|██████████| 280/280 [00:00<00:00, 8515.37 examples/s]
Map: 100%|██████████| 56/56 [00:00<00:00, 6215.59 examples/s]
Map: 100%|██████████| 912/912 [00:00<00:00, 9344.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 35/35 [00:03<00:00, 10.97it/s, loss=0.414]


Epoch 1 Loss: 25.8502
Epoch 2/3


Training Epoch 2: 100%|██████████| 35/35 [00:03<00:00, 10.74it/s, loss=0.526]


Epoch 2 Loss: 13.4004
Epoch 3/3


Training Epoch 3: 100%|██████████| 35/35 [00:03<00:00, 10.92it/s, loss=0.211]


Epoch 3 Loss: 10.4114
Test Metrics: Precision=0.8948, Recall=0.8948, F1=0.8948

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 300, Split 1...


Map: 100%|██████████| 300/300 [00:00<00:00, 8735.83 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 7327.58 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 9232.58 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 38/38 [00:03<00:00, 11.81it/s, loss=0.55] 


Epoch 1 Loss: 30.1290
Epoch 2/3


Training Epoch 2: 100%|██████████| 38/38 [00:03<00:00, 11.58it/s, loss=0.39] 


Epoch 2 Loss: 14.8382
Epoch 3/3


Training Epoch 3: 100%|██████████| 38/38 [00:03<00:00, 11.69it/s, loss=0.509]


Epoch 3 Loss: 11.5910
Test Metrics: Precision=0.8989, Recall=0.8989, F1=0.8989

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 300, Split 2...


Map: 100%|██████████| 300/300 [00:00<00:00, 8790.08 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 6119.94 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 9176.98 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 38/38 [00:03<00:00, 11.12it/s, loss=0.409]


Epoch 1 Loss: 29.5178
Epoch 2/3


Training Epoch 2: 100%|██████████| 38/38 [00:03<00:00, 11.75it/s, loss=0.315]


Epoch 2 Loss: 15.0226
Epoch 3/3


Training Epoch 3: 100%|██████████| 38/38 [00:03<00:00, 11.59it/s, loss=0.191]


Epoch 3 Loss: 11.4195
Test Metrics: Precision=0.8942, Recall=0.8942, F1=0.8942

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 300, Split 3...


Map: 100%|██████████| 300/300 [00:00<00:00, 8775.43 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 7009.20 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 9297.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 38/38 [00:03<00:00, 11.49it/s, loss=0.529]


Epoch 1 Loss: 26.5508
Epoch 2/3


Training Epoch 2: 100%|██████████| 38/38 [00:03<00:00, 11.68it/s, loss=0.268]


Epoch 2 Loss: 13.4822
Epoch 3/3


Training Epoch 3: 100%|██████████| 38/38 [00:03<00:00, 11.82it/s, loss=0.331]


Epoch 3 Loss: 10.1507
Test Metrics: Precision=0.8968, Recall=0.8968, F1=0.8968

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 300, Split 4...


Map: 100%|██████████| 300/300 [00:00<00:00, 9916.39 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 6991.87 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 8963.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 38/38 [00:02<00:00, 12.69it/s, loss=0.485]


Epoch 1 Loss: 26.3721
Epoch 2/3


Training Epoch 2: 100%|██████████| 38/38 [00:03<00:00, 12.60it/s, loss=0.374]


Epoch 2 Loss: 12.8140
Epoch 3/3


Training Epoch 3: 100%|██████████| 38/38 [00:03<00:00, 12.62it/s, loss=0.277]


Epoch 3 Loss: 9.5638
Test Metrics: Precision=0.9008, Recall=0.9008, F1=0.9008

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 300, Split 5...


Map: 100%|██████████| 300/300 [00:00<00:00, 8883.04 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 7651.05 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 9172.14 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 38/38 [00:03<00:00, 11.12it/s, loss=0.423]


Epoch 1 Loss: 29.1768
Epoch 2/3


Training Epoch 2: 100%|██████████| 38/38 [00:03<00:00, 11.15it/s, loss=0.843]


Epoch 2 Loss: 15.3011
Epoch 3/3


Training Epoch 3: 100%|██████████| 38/38 [00:03<00:00, 11.28it/s, loss=0.193]


Epoch 3 Loss: 11.8366
Test Metrics: Precision=0.8964, Recall=0.8964, F1=0.8964

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 300, Split 6...


Map: 100%|██████████| 300/300 [00:00<00:00, 8609.06 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 6890.22 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 9040.61 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 38/38 [00:03<00:00, 10.56it/s, loss=0.304]


Epoch 1 Loss: 25.9107
Epoch 2/3


Training Epoch 2: 100%|██████████| 38/38 [00:03<00:00, 10.95it/s, loss=0.446]


Epoch 2 Loss: 14.3718
Epoch 3/3


Training Epoch 3: 100%|██████████| 38/38 [00:03<00:00, 10.95it/s, loss=0.315]


Epoch 3 Loss: 10.9274
Test Metrics: Precision=0.8982, Recall=0.8982, F1=0.8982

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 300, Split 7...


Map: 100%|██████████| 300/300 [00:00<00:00, 8184.06 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 7445.29 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 9230.27 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 38/38 [00:03<00:00, 10.91it/s, loss=0.405]


Epoch 1 Loss: 26.7458
Epoch 2/3


Training Epoch 2: 100%|██████████| 38/38 [00:03<00:00, 11.31it/s, loss=0.648]


Epoch 2 Loss: 13.8631
Epoch 3/3


Training Epoch 3: 100%|██████████| 38/38 [00:03<00:00, 11.05it/s, loss=0.29] 


Epoch 3 Loss: 10.7849
Test Metrics: Precision=0.9063, Recall=0.9063, F1=0.9063

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 300, Split 8...


Map: 100%|██████████| 300/300 [00:00<00:00, 8468.32 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 6300.43 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 8854.32 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 38/38 [00:03<00:00, 11.51it/s, loss=0.453]


Epoch 1 Loss: 28.5642
Epoch 2/3


Training Epoch 2: 100%|██████████| 38/38 [00:03<00:00, 11.54it/s, loss=0.315]


Epoch 2 Loss: 15.5867
Epoch 3/3


Training Epoch 3: 100%|██████████| 38/38 [00:03<00:00, 11.44it/s, loss=0.401]


Epoch 3 Loss: 12.1616
Test Metrics: Precision=0.9003, Recall=0.9003, F1=0.9003

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 300, Split 9...


Map: 100%|██████████| 300/300 [00:00<00:00, 9298.84 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 6799.92 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 3113.97 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 38/38 [00:03<00:00, 11.86it/s, loss=0.278]


Epoch 1 Loss: 29.4655
Epoch 2/3


Training Epoch 2: 100%|██████████| 38/38 [00:03<00:00, 11.77it/s, loss=0.595]


Epoch 2 Loss: 15.3092
Epoch 3/3


Training Epoch 3: 100%|██████████| 38/38 [00:03<00:00, 12.12it/s, loss=0.541]


Epoch 3 Loss: 11.8827
Test Metrics: Precision=0.8949, Recall=0.8949, F1=0.8949

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 300, Split 10...


Map: 100%|██████████| 300/300 [00:00<00:00, 8177.41 examples/s]
Map: 100%|██████████| 60/60 [00:00<00:00, 6969.40 examples/s]
Map: 100%|██████████| 888/888 [00:00<00:00, 8979.45 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 38/38 [00:03<00:00, 10.87it/s, loss=0.272]


Epoch 1 Loss: 29.4072
Epoch 2/3


Training Epoch 2: 100%|██████████| 38/38 [00:03<00:00, 10.95it/s, loss=0.281]


Epoch 2 Loss: 14.8305
Epoch 3/3


Training Epoch 3: 100%|██████████| 38/38 [00:03<00:00, 11.24it/s, loss=0.372]


Epoch 3 Loss: 11.5264
Test Metrics: Precision=0.8934, Recall=0.8934, F1=0.8934

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 320, Split 1...


Map: 100%|██████████| 320/320 [00:00<00:00, 8906.23 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 7023.98 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 9456.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 40/40 [00:03<00:00, 11.73it/s, loss=0.534]


Epoch 1 Loss: 28.0601
Epoch 2/3


Training Epoch 2: 100%|██████████| 40/40 [00:03<00:00, 11.69it/s, loss=0.176]


Epoch 2 Loss: 13.9434
Epoch 3/3


Training Epoch 3: 100%|██████████| 40/40 [00:03<00:00, 11.91it/s, loss=0.264]


Epoch 3 Loss: 10.2959
Test Metrics: Precision=0.9054, Recall=0.9054, F1=0.9054

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 320, Split 2...


Map: 100%|██████████| 320/320 [00:00<00:00, 9136.55 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 7324.10 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 9346.66 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 40/40 [00:03<00:00, 11.18it/s, loss=0.548]


Epoch 1 Loss: 29.5102
Epoch 2/3


Training Epoch 2: 100%|██████████| 40/40 [00:03<00:00, 11.30it/s, loss=0.39] 


Epoch 2 Loss: 14.2020
Epoch 3/3


Training Epoch 3: 100%|██████████| 40/40 [00:03<00:00, 11.68it/s, loss=0.145]


Epoch 3 Loss: 11.0028
Test Metrics: Precision=0.9018, Recall=0.9018, F1=0.9018

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 320, Split 3...


Map: 100%|██████████| 320/320 [00:00<00:00, 8879.01 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 7052.77 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 9232.69 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 40/40 [00:03<00:00, 11.23it/s, loss=0.536]


Epoch 1 Loss: 27.0393
Epoch 2/3


Training Epoch 2: 100%|██████████| 40/40 [00:03<00:00, 11.18it/s, loss=0.252]


Epoch 2 Loss: 13.2259
Epoch 3/3


Training Epoch 3: 100%|██████████| 40/40 [00:03<00:00, 11.23it/s, loss=0.292] 


Epoch 3 Loss: 9.7981
Test Metrics: Precision=0.9040, Recall=0.9040, F1=0.9040

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 320, Split 4...


Map: 100%|██████████| 320/320 [00:00<00:00, 9427.06 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 6786.04 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 8898.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 40/40 [00:03<00:00, 12.79it/s, loss=0.55] 


Epoch 1 Loss: 26.6615
Epoch 2/3


Training Epoch 2: 100%|██████████| 40/40 [00:03<00:00, 12.76it/s, loss=0.33] 


Epoch 2 Loss: 13.0126
Epoch 3/3


Training Epoch 3: 100%|██████████| 40/40 [00:03<00:00, 12.36it/s, loss=0.259]


Epoch 3 Loss: 9.3937
Test Metrics: Precision=0.9042, Recall=0.9042, F1=0.9042

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 320, Split 5...


Map: 100%|██████████| 320/320 [00:00<00:00, 8787.39 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 7167.45 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 9187.94 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 40/40 [00:03<00:00, 10.92it/s, loss=0.386]


Epoch 1 Loss: 29.1632
Epoch 2/3


Training Epoch 2: 100%|██████████| 40/40 [00:03<00:00, 10.56it/s, loss=0.413]


Epoch 2 Loss: 15.2010
Epoch 3/3


Training Epoch 3: 100%|██████████| 40/40 [00:03<00:00, 10.55it/s, loss=0.376]


Epoch 3 Loss: 11.9812
Test Metrics: Precision=0.9002, Recall=0.9002, F1=0.9002

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 320, Split 6...


Map: 100%|██████████| 320/320 [00:00<00:00, 8929.34 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 7909.82 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 9207.31 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 40/40 [00:03<00:00, 10.92it/s, loss=0.412]


Epoch 1 Loss: 29.2360
Epoch 2/3


Training Epoch 2: 100%|██████████| 40/40 [00:03<00:00, 11.03it/s, loss=0.241]


Epoch 2 Loss: 14.8867
Epoch 3/3


Training Epoch 3: 100%|██████████| 40/40 [00:03<00:00, 10.72it/s, loss=0.229]


Epoch 3 Loss: 11.3363
Test Metrics: Precision=0.9030, Recall=0.9030, F1=0.9030

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 320, Split 7...


Map: 100%|██████████| 320/320 [00:00<00:00, 8704.02 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 7433.00 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 9363.50 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 40/40 [00:03<00:00, 11.07it/s, loss=0.374]


Epoch 1 Loss: 28.0100
Epoch 2/3


Training Epoch 2: 100%|██████████| 40/40 [00:03<00:00, 11.22it/s, loss=0.214]


Epoch 2 Loss: 14.6237
Epoch 3/3


Training Epoch 3: 100%|██████████| 40/40 [00:03<00:00, 11.27it/s, loss=0.236]


Epoch 3 Loss: 10.9705
Test Metrics: Precision=0.9066, Recall=0.9066, F1=0.9066

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 320, Split 8...


Map: 100%|██████████| 320/320 [00:00<00:00, 9032.09 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 7309.14 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 8974.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 40/40 [00:03<00:00, 11.20it/s, loss=0.512]


Epoch 1 Loss: 30.2869
Epoch 2/3


Training Epoch 2: 100%|██████████| 40/40 [00:03<00:00, 11.33it/s, loss=0.428]


Epoch 2 Loss: 15.5314
Epoch 3/3


Training Epoch 3: 100%|██████████| 40/40 [00:03<00:00, 11.68it/s, loss=0.352]


Epoch 3 Loss: 11.6413
Test Metrics: Precision=0.9033, Recall=0.9033, F1=0.9033

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 320, Split 9...


Map: 100%|██████████| 320/320 [00:00<00:00, 8854.41 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 6971.44 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 9048.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 40/40 [00:03<00:00, 11.43it/s, loss=0.453]


Epoch 1 Loss: 30.5300
Epoch 2/3


Training Epoch 2: 100%|██████████| 40/40 [00:03<00:00, 11.52it/s, loss=0.475]


Epoch 2 Loss: 14.7864
Epoch 3/3


Training Epoch 3: 100%|██████████| 40/40 [00:03<00:00, 11.73it/s, loss=0.329]


Epoch 3 Loss: 10.9750
Test Metrics: Precision=0.8963, Recall=0.8963, F1=0.8963

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 320, Split 10...


Map: 100%|██████████| 320/320 [00:00<00:00, 8495.49 examples/s]
Map: 100%|██████████| 64/64 [00:00<00:00, 7030.79 examples/s]
Map: 100%|██████████| 864/864 [00:00<00:00, 9177.23 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 40/40 [00:03<00:00, 10.87it/s, loss=0.386]


Epoch 1 Loss: 27.6037
Epoch 2/3


Training Epoch 2: 100%|██████████| 40/40 [00:03<00:00, 10.95it/s, loss=0.369]


Epoch 2 Loss: 13.7637
Epoch 3/3


Training Epoch 3: 100%|██████████| 40/40 [00:03<00:00, 11.09it/s, loss=0.433]


Epoch 3 Loss: 10.4424
Test Metrics: Precision=0.8972, Recall=0.8972, F1=0.8972

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 340, Split 1...


Map: 100%|██████████| 340/340 [00:00<00:00, 8563.66 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 7404.27 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9247.96 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 43/43 [00:03<00:00, 11.77it/s, loss=0.614]


Epoch 1 Loss: 30.0118
Epoch 2/3


Training Epoch 2: 100%|██████████| 43/43 [00:03<00:00, 11.87it/s, loss=0.293]


Epoch 2 Loss: 14.9621
Epoch 3/3


Training Epoch 3: 100%|██████████| 43/43 [00:03<00:00, 11.65it/s, loss=0.229]


Epoch 3 Loss: 11.2708
Test Metrics: Precision=0.9045, Recall=0.9045, F1=0.9045

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 340, Split 2...


Map: 100%|██████████| 340/340 [00:00<00:00, 8821.97 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 7749.50 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9122.73 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 43/43 [00:03<00:00, 11.29it/s, loss=0.408]


Epoch 1 Loss: 29.2720
Epoch 2/3


Training Epoch 2: 100%|██████████| 43/43 [00:03<00:00, 11.72it/s, loss=0.307]


Epoch 2 Loss: 15.4438
Epoch 3/3


Training Epoch 3: 100%|██████████| 43/43 [00:03<00:00, 11.42it/s, loss=0.26] 


Epoch 3 Loss: 11.2992
Test Metrics: Precision=0.9004, Recall=0.9004, F1=0.9004

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 340, Split 3...


Map: 100%|██████████| 340/340 [00:00<00:00, 1512.45 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 7448.17 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9364.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 43/43 [00:03<00:00, 11.24it/s, loss=0.202]


Epoch 1 Loss: 27.1831
Epoch 2/3


Training Epoch 2: 100%|██████████| 43/43 [00:03<00:00, 11.37it/s, loss=0.383]


Epoch 2 Loss: 14.0917
Epoch 3/3


Training Epoch 3: 100%|██████████| 43/43 [00:03<00:00, 11.26it/s, loss=0.147]


Epoch 3 Loss: 10.2120
Test Metrics: Precision=0.9067, Recall=0.9067, F1=0.9067

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 340, Split 4...


Map: 100%|██████████| 340/340 [00:00<00:00, 9964.46 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 6700.80 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9285.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 43/43 [00:03<00:00, 12.53it/s, loss=0.49] 


Epoch 1 Loss: 29.8569
Epoch 2/3


Training Epoch 2: 100%|██████████| 43/43 [00:03<00:00, 12.52it/s, loss=0.31] 


Epoch 2 Loss: 14.4959
Epoch 3/3


Training Epoch 3: 100%|██████████| 43/43 [00:03<00:00, 13.03it/s, loss=0.226]


Epoch 3 Loss: 10.9896
Test Metrics: Precision=0.9011, Recall=0.9011, F1=0.9011

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 340, Split 5...


Map: 100%|██████████| 340/340 [00:00<00:00, 8655.45 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 6981.10 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9247.60 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 43/43 [00:03<00:00, 11.17it/s, loss=0.561]


Epoch 1 Loss: 30.8284
Epoch 2/3


Training Epoch 2: 100%|██████████| 43/43 [00:03<00:00, 11.38it/s, loss=0.654]


Epoch 2 Loss: 16.3044
Epoch 3/3


Training Epoch 3: 100%|██████████| 43/43 [00:03<00:00, 11.15it/s, loss=0.193]


Epoch 3 Loss: 12.0075
Test Metrics: Precision=0.9051, Recall=0.9051, F1=0.9051

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 340, Split 6...


Map: 100%|██████████| 340/340 [00:00<00:00, 9200.65 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 7375.36 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9481.13 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 43/43 [00:03<00:00, 11.11it/s, loss=0.322]


Epoch 1 Loss: 31.2310
Epoch 2/3


Training Epoch 2: 100%|██████████| 43/43 [00:03<00:00, 11.03it/s, loss=0.327]


Epoch 2 Loss: 16.0053
Epoch 3/3


Training Epoch 3: 100%|██████████| 43/43 [00:03<00:00, 11.26it/s, loss=0.212]


Epoch 3 Loss: 12.1968
Test Metrics: Precision=0.9059, Recall=0.9059, F1=0.9059

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 340, Split 7...


Map: 100%|██████████| 340/340 [00:00<00:00, 8876.01 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 7531.96 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9366.79 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 43/43 [00:03<00:00, 11.38it/s, loss=0.397]


Epoch 1 Loss: 30.2770
Epoch 2/3


Training Epoch 2: 100%|██████████| 43/43 [00:03<00:00, 11.21it/s, loss=0.476]


Epoch 2 Loss: 15.5228
Epoch 3/3


Training Epoch 3: 100%|██████████| 43/43 [00:03<00:00, 11.74it/s, loss=0.328]


Epoch 3 Loss: 11.8715
Test Metrics: Precision=0.9046, Recall=0.9046, F1=0.9046

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 340, Split 8...


Map: 100%|██████████| 340/340 [00:00<00:00, 9232.93 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 6637.33 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9285.16 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 43/43 [00:03<00:00, 11.18it/s, loss=0.436]


Epoch 1 Loss: 30.3463
Epoch 2/3


Training Epoch 2: 100%|██████████| 43/43 [00:03<00:00, 11.39it/s, loss=0.288]


Epoch 2 Loss: 15.7190
Epoch 3/3


Training Epoch 3: 100%|██████████| 43/43 [00:03<00:00, 11.87it/s, loss=0.188]


Epoch 3 Loss: 12.1578
Test Metrics: Precision=0.9031, Recall=0.9031, F1=0.9031

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 340, Split 9...


Map: 100%|██████████| 340/340 [00:00<00:00, 9600.41 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 7164.89 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9080.50 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 43/43 [00:03<00:00, 11.85it/s, loss=0.421]


Epoch 1 Loss: 31.3899
Epoch 2/3


Training Epoch 2: 100%|██████████| 43/43 [00:03<00:00, 11.67it/s, loss=0.568]


Epoch 2 Loss: 16.3776
Epoch 3/3


Training Epoch 3: 100%|██████████| 43/43 [00:03<00:00, 11.47it/s, loss=0.235]


Epoch 3 Loss: 12.1211
Test Metrics: Precision=0.8961, Recall=0.8961, F1=0.8961

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 340, Split 10...


Map: 100%|██████████| 340/340 [00:00<00:00, 8620.60 examples/s]
Map: 100%|██████████| 68/68 [00:00<00:00, 7764.48 examples/s]
Map: 100%|██████████| 840/840 [00:00<00:00, 9370.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 43/43 [00:03<00:00, 10.79it/s, loss=0.462]


Epoch 1 Loss: 27.6114
Epoch 2/3


Training Epoch 2: 100%|██████████| 43/43 [00:03<00:00, 11.39it/s, loss=0.238]


Epoch 2 Loss: 14.0667
Epoch 3/3


Training Epoch 3: 100%|██████████| 43/43 [00:03<00:00, 11.55it/s, loss=0.165]


Epoch 3 Loss: 10.1912
Test Metrics: Precision=0.9072, Recall=0.9072, F1=0.9072

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 360, Split 1...


Map: 100%|██████████| 360/360 [00:00<00:00, 8747.90 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7052.21 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9493.64 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:03<00:00, 11.75it/s, loss=0.468]


Epoch 1 Loss: 30.7415
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:03<00:00, 11.42it/s, loss=0.217]


Epoch 2 Loss: 15.3405
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:03<00:00, 11.62it/s, loss=0.29] 


Epoch 3 Loss: 11.6413
Test Metrics: Precision=0.9055, Recall=0.9055, F1=0.9055

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 360, Split 2...


Map: 100%|██████████| 360/360 [00:00<00:00, 9561.30 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 6057.97 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9624.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:03<00:00, 11.31it/s, loss=0.506]


Epoch 1 Loss: 31.5298
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:03<00:00, 11.62it/s, loss=0.295]


Epoch 2 Loss: 15.8232
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:03<00:00, 11.55it/s, loss=0.456]


Epoch 3 Loss: 11.8065
Test Metrics: Precision=0.8975, Recall=0.8975, F1=0.8975

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 360, Split 3...


Map: 100%|██████████| 360/360 [00:00<00:00, 9014.78 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 8011.40 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9541.36 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:04<00:00, 11.16it/s, loss=0.316]


Epoch 1 Loss: 31.3606
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:04<00:00, 11.13it/s, loss=0.422]


Epoch 2 Loss: 15.5043
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:04<00:00, 11.02it/s, loss=0.185]


Epoch 3 Loss: 11.3586
Test Metrics: Precision=0.9033, Recall=0.9033, F1=0.9033

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 360, Split 4...


Map: 100%|██████████| 360/360 [00:00<00:00, 9762.20 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 6666.74 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 8947.43 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:03<00:00, 12.57it/s, loss=0.341]


Epoch 1 Loss: 30.4234
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:03<00:00, 12.10it/s, loss=0.141]


Epoch 2 Loss: 14.4060
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:03<00:00, 12.36it/s, loss=0.209]


Epoch 3 Loss: 10.4395
Test Metrics: Precision=0.9061, Recall=0.9061, F1=0.9061

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 360, Split 5...


Map: 100%|██████████| 360/360 [00:00<00:00, 8856.68 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 6617.65 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9195.81 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:04<00:00, 10.80it/s, loss=0.436]


Epoch 1 Loss: 33.4472
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:03<00:00, 11.33it/s, loss=0.277]


Epoch 2 Loss: 15.5171
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:04<00:00, 10.83it/s, loss=0.32] 


Epoch 3 Loss: 11.9814
Test Metrics: Precision=0.9067, Recall=0.9067, F1=0.9067

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 360, Split 6...


Map: 100%|██████████| 360/360 [00:00<00:00, 8846.20 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7415.71 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 8933.65 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:04<00:00, 10.73it/s, loss=0.525]


Epoch 1 Loss: 32.3015
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:04<00:00, 11.16it/s, loss=0.435]


Epoch 2 Loss: 16.9708
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:04<00:00, 11.20it/s, loss=0.169]


Epoch 3 Loss: 12.8856
Test Metrics: Precision=0.9031, Recall=0.9031, F1=0.9031

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 360, Split 7...


Map: 100%|██████████| 360/360 [00:00<00:00, 8843.97 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 8675.63 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 2874.80 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:03<00:00, 11.42it/s, loss=0.423]


Epoch 1 Loss: 32.6686
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:03<00:00, 11.51it/s, loss=0.339]


Epoch 2 Loss: 16.1588
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:03<00:00, 11.26it/s, loss=0.141]


Epoch 3 Loss: 11.6849
Test Metrics: Precision=0.9055, Recall=0.9055, F1=0.9055

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 360, Split 8...


Map: 100%|██████████| 360/360 [00:00<00:00, 8916.36 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 6920.98 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9174.54 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:04<00:00, 11.21it/s, loss=0.513]


Epoch 1 Loss: 31.6685
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:03<00:00, 11.61it/s, loss=0.385]


Epoch 2 Loss: 16.0747
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:03<00:00, 11.79it/s, loss=0.151]


Epoch 3 Loss: 12.2455
Test Metrics: Precision=0.9091, Recall=0.9091, F1=0.9091

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 360, Split 9...


Map: 100%|██████████| 360/360 [00:00<00:00, 9264.74 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7389.22 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9208.95 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:04<00:00, 11.18it/s, loss=0.418]


Epoch 1 Loss: 33.1000
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:04<00:00, 11.08it/s, loss=0.265]


Epoch 2 Loss: 16.5849
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:03<00:00, 11.33it/s, loss=0.347]


Epoch 3 Loss: 12.1021
Test Metrics: Precision=0.9044, Recall=0.9044, F1=0.9044

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 360, Split 10...


Map: 100%|██████████| 360/360 [00:00<00:00, 8564.61 examples/s]
Map: 100%|██████████| 72/72 [00:00<00:00, 7754.66 examples/s]
Map: 100%|██████████| 816/816 [00:00<00:00, 9411.90 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 45/45 [00:04<00:00, 11.21it/s, loss=0.358]


Epoch 1 Loss: 29.4674
Epoch 2/3


Training Epoch 2: 100%|██████████| 45/45 [00:04<00:00, 11.19it/s, loss=0.309]


Epoch 2 Loss: 16.2179
Epoch 3/3


Training Epoch 3: 100%|██████████| 45/45 [00:04<00:00, 11.04it/s, loss=0.175]


Epoch 3 Loss: 12.3729
Test Metrics: Precision=0.8995, Recall=0.8995, F1=0.8995

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 380, Split 1...


Map: 100%|██████████| 380/380 [00:00<00:00, 8489.68 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7037.73 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9214.29 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:04<00:00, 11.83it/s, loss=0.373]


Epoch 1 Loss: 32.3860
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:03<00:00, 12.07it/s, loss=0.377]


Epoch 2 Loss: 16.1588
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:03<00:00, 12.31it/s, loss=0.235]


Epoch 3 Loss: 11.6803
Test Metrics: Precision=0.9089, Recall=0.9089, F1=0.9089

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 380, Split 2...


Map: 100%|██████████| 380/380 [00:00<00:00, 9111.95 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 6571.70 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9434.83 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:04<00:00, 11.59it/s, loss=0.312]


Epoch 1 Loss: 32.2564
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:04<00:00, 11.54it/s, loss=0.425]


Epoch 2 Loss: 16.7231
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:04<00:00, 11.47it/s, loss=0.15] 


Epoch 3 Loss: 12.6845
Test Metrics: Precision=0.9034, Recall=0.9034, F1=0.9034

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 380, Split 3...


Map: 100%|██████████| 380/380 [00:00<00:00, 8895.31 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7585.90 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9495.29 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:04<00:00, 11.18it/s, loss=0.277]


Epoch 1 Loss: 33.5500
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:04<00:00, 11.05it/s, loss=0.205]


Epoch 2 Loss: 15.4141
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:04<00:00, 10.88it/s, loss=0.137] 


Epoch 3 Loss: 11.4598
Test Metrics: Precision=0.9092, Recall=0.9092, F1=0.9092

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 380, Split 4...


Map: 100%|██████████| 380/380 [00:00<00:00, 9220.06 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7654.21 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9046.09 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:04<00:00, 11.68it/s, loss=0.386]


Epoch 1 Loss: 33.3145
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:04<00:00, 11.97it/s, loss=0.312]


Epoch 2 Loss: 15.3948
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:04<00:00, 11.99it/s, loss=0.254]


Epoch 3 Loss: 11.4439
Test Metrics: Precision=0.9075, Recall=0.9075, F1=0.9075

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 380, Split 5...


Map: 100%|██████████| 380/380 [00:00<00:00, 8986.44 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 8247.96 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9241.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:04<00:00, 11.14it/s, loss=0.383]


Epoch 1 Loss: 33.3618
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:04<00:00, 11.55it/s, loss=0.36] 


Epoch 2 Loss: 17.0344
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:04<00:00, 11.15it/s, loss=0.387]


Epoch 3 Loss: 13.0482
Test Metrics: Precision=0.9070, Recall=0.9070, F1=0.9070

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 380, Split 6...


Map: 100%|██████████| 380/380 [00:00<00:00, 8718.15 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7559.81 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9349.37 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:04<00:00, 10.92it/s, loss=0.597]


Epoch 1 Loss: 33.4488
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:04<00:00, 10.70it/s, loss=0.283]


Epoch 2 Loss: 17.3482
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:04<00:00, 11.06it/s, loss=0.189]


Epoch 3 Loss: 12.4821
Test Metrics: Precision=0.9064, Recall=0.9064, F1=0.9064

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 380, Split 7...


Map: 100%|██████████| 380/380 [00:00<00:00, 9181.87 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 6925.20 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9532.18 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:04<00:00, 11.06it/s, loss=0.375]


Epoch 1 Loss: 32.9836
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:04<00:00, 11.31it/s, loss=0.358]


Epoch 2 Loss: 17.6383
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:04<00:00, 11.21it/s, loss=0.492]


Epoch 3 Loss: 13.3206
Test Metrics: Precision=0.9022, Recall=0.9022, F1=0.9022

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 380, Split 8...


Map: 100%|██████████| 380/380 [00:00<00:00, 9466.32 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7262.37 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9470.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:04<00:00, 11.66it/s, loss=0.535]


Epoch 1 Loss: 32.9981
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:04<00:00, 11.91it/s, loss=0.281]


Epoch 2 Loss: 16.3208
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:03<00:00, 12.02it/s, loss=0.138] 


Epoch 3 Loss: 11.9794
Test Metrics: Precision=0.9089, Recall=0.9089, F1=0.9089

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 380, Split 9...


Map: 100%|██████████| 380/380 [00:00<00:00, 9182.09 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 6842.85 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9387.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:04<00:00, 11.46it/s, loss=0.565]


Epoch 1 Loss: 30.9052
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:04<00:00, 11.53it/s, loss=0.234]


Epoch 2 Loss: 15.3651
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:04<00:00, 11.76it/s, loss=0.159] 


Epoch 3 Loss: 10.9008
Test Metrics: Precision=0.9074, Recall=0.9074, F1=0.9074

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 380, Split 10...


Map: 100%|██████████| 380/380 [00:00<00:00, 8600.40 examples/s]
Map: 100%|██████████| 76/76 [00:00<00:00, 7449.40 examples/s]
Map: 100%|██████████| 792/792 [00:00<00:00, 9403.92 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 48/48 [00:04<00:00, 11.46it/s, loss=0.24] 


Epoch 1 Loss: 33.3255
Epoch 2/3


Training Epoch 2: 100%|██████████| 48/48 [00:04<00:00, 11.67it/s, loss=0.282]


Epoch 2 Loss: 16.6310
Epoch 3/3


Training Epoch 3: 100%|██████████| 48/48 [00:04<00:00, 11.25it/s, loss=0.204]


Epoch 3 Loss: 12.1322
Test Metrics: Precision=0.9083, Recall=0.9083, F1=0.9083

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 400, Split 1...


Map: 100%|██████████| 400/400 [00:00<00:00, 8626.04 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7236.86 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9196.56 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:04<00:00, 11.79it/s, loss=0.285]


Epoch 1 Loss: 32.9473
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:04<00:00, 12.04it/s, loss=0.213]


Epoch 2 Loss: 16.0850
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:04<00:00, 12.02it/s, loss=0.141]


Epoch 3 Loss: 11.8960
Test Metrics: Precision=0.9059, Recall=0.9059, F1=0.9059

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 400, Split 2...


Map: 100%|██████████| 400/400 [00:00<00:00, 8912.63 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7556.96 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9211.13 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:04<00:00, 11.55it/s, loss=0.352]


Epoch 1 Loss: 32.6809
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:04<00:00, 11.41it/s, loss=0.291]


Epoch 2 Loss: 16.9144
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:04<00:00, 11.62it/s, loss=0.223]


Epoch 3 Loss: 12.5708
Test Metrics: Precision=0.9052, Recall=0.9052, F1=0.9052

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 400, Split 3...


Map: 100%|██████████| 400/400 [00:00<00:00, 8607.76 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 6337.84 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9631.53 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:04<00:00, 10.77it/s, loss=0.254]


Epoch 1 Loss: 31.7913
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:04<00:00, 11.28it/s, loss=0.199]


Epoch 2 Loss: 16.0968
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:04<00:00, 11.29it/s, loss=0.24] 


Epoch 3 Loss: 11.5046
Test Metrics: Precision=0.9098, Recall=0.9098, F1=0.9098

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 400, Split 4...


Map: 100%|██████████| 400/400 [00:00<00:00, 9346.64 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7440.83 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9276.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:04<00:00, 11.36it/s, loss=0.353]


Epoch 1 Loss: 32.5910
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:04<00:00, 11.86it/s, loss=0.264]


Epoch 2 Loss: 15.3373
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:04<00:00, 11.57it/s, loss=0.237] 


Epoch 3 Loss: 11.2963
Test Metrics: Precision=0.9047, Recall=0.9047, F1=0.9047

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 400, Split 5...


Map: 100%|██████████| 400/400 [00:00<00:00, 9147.98 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 6991.09 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9290.97 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:04<00:00, 11.13it/s, loss=0.351]


Epoch 1 Loss: 33.2613
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:04<00:00, 10.81it/s, loss=0.264]


Epoch 2 Loss: 17.0325
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:04<00:00, 10.78it/s, loss=0.194]


Epoch 3 Loss: 12.2524
Test Metrics: Precision=0.9139, Recall=0.9139, F1=0.9139

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 400, Split 6...


Map: 100%|██████████| 400/400 [00:00<00:00, 9038.82 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 6641.55 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9505.11 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:04<00:00, 10.66it/s, loss=0.371]


Epoch 1 Loss: 32.1316
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:04<00:00, 10.81it/s, loss=0.296]


Epoch 2 Loss: 16.1536
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:04<00:00, 10.48it/s, loss=0.164]


Epoch 3 Loss: 11.7855
Test Metrics: Precision=0.9133, Recall=0.9133, F1=0.9133

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 400, Split 7...


Map: 100%|██████████| 400/400 [00:00<00:00, 9003.94 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 8060.93 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9352.28 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:04<00:00, 11.51it/s, loss=0.353]


Epoch 1 Loss: 33.5748
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:04<00:00, 11.06it/s, loss=0.352]


Epoch 2 Loss: 16.3108
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:04<00:00, 11.36it/s, loss=0.259]


Epoch 3 Loss: 11.9900
Test Metrics: Precision=0.9107, Recall=0.9107, F1=0.9107

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 400, Split 8...


Map: 100%|██████████| 400/400 [00:00<00:00, 9115.72 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7255.48 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9068.25 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:04<00:00, 12.04it/s, loss=0.363]


Epoch 1 Loss: 34.7503
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:04<00:00, 11.85it/s, loss=0.399]


Epoch 2 Loss: 16.7672
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:04<00:00, 11.51it/s, loss=0.259]


Epoch 3 Loss: 12.8672
Test Metrics: Precision=0.9107, Recall=0.9107, F1=0.9107

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 400, Split 9...


Map: 100%|██████████| 400/400 [00:00<00:00, 9215.36 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7542.36 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9261.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:04<00:00, 11.30it/s, loss=0.333]


Epoch 1 Loss: 34.8428
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:04<00:00, 11.45it/s, loss=0.259]


Epoch 2 Loss: 16.8927
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:04<00:00, 10.93it/s, loss=0.406]


Epoch 3 Loss: 13.0268
Test Metrics: Precision=0.9044, Recall=0.9044, F1=0.9044

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 400, Split 10...


Map: 100%|██████████| 400/400 [00:00<00:00, 8774.65 examples/s]
Map: 100%|██████████| 80/80 [00:00<00:00, 7867.39 examples/s]
Map: 100%|██████████| 768/768 [00:00<00:00, 9197.19 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 50/50 [00:04<00:00, 10.96it/s, loss=0.548]


Epoch 1 Loss: 32.4773
Epoch 2/3


Training Epoch 2: 100%|██████████| 50/50 [00:04<00:00, 11.06it/s, loss=0.222]


Epoch 2 Loss: 15.9468
Epoch 3/3


Training Epoch 3: 100%|██████████| 50/50 [00:04<00:00, 11.18it/s, loss=0.242]


Epoch 3 Loss: 11.6950
Test Metrics: Precision=0.9064, Recall=0.9064, F1=0.9064

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 420, Split 1...


Map: 100%|██████████| 420/420 [00:00<00:00, 9103.68 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 8929.48 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9259.06 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:04<00:00, 11.81it/s, loss=0.415]


Epoch 1 Loss: 36.6001
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:04<00:00, 11.88it/s, loss=0.406]


Epoch 2 Loss: 18.0572
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:04<00:00, 11.92it/s, loss=0.788]


Epoch 3 Loss: 14.0082
Test Metrics: Precision=0.9053, Recall=0.9053, F1=0.9053

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 420, Split 2...


Map: 100%|██████████| 420/420 [00:00<00:00, 8896.33 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7146.92 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9385.63 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:04<00:00, 11.72it/s, loss=0.297]


Epoch 1 Loss: 38.8155
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:04<00:00, 12.01it/s, loss=0.291]


Epoch 2 Loss: 19.9846
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:04<00:00, 11.95it/s, loss=0.299] 


Epoch 3 Loss: 14.8483
Test Metrics: Precision=0.9034, Recall=0.9034, F1=0.9034

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 420, Split 3...


Map: 100%|██████████| 420/420 [00:00<00:00, 8643.64 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7446.77 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9659.33 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:04<00:00, 10.83it/s, loss=0.334]


Epoch 1 Loss: 34.5532
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:04<00:00, 11.31it/s, loss=0.183]


Epoch 2 Loss: 16.8687
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:04<00:00, 11.31it/s, loss=0.118] 


Epoch 3 Loss: 12.5250
Test Metrics: Precision=0.9099, Recall=0.9099, F1=0.9099

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 420, Split 4...


Map: 100%|██████████| 420/420 [00:00<00:00, 9456.57 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7712.14 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9086.11 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:04<00:00, 11.73it/s, loss=0.398]


Epoch 1 Loss: 34.6007
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:04<00:00, 11.72it/s, loss=0.23] 


Epoch 2 Loss: 15.9387
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:04<00:00, 12.07it/s, loss=0.328]


Epoch 3 Loss: 11.9133
Test Metrics: Precision=0.9095, Recall=0.9095, F1=0.9095

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 420, Split 5...


Map: 100%|██████████| 420/420 [00:00<00:00, 9066.71 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7958.29 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9222.20 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:04<00:00, 10.78it/s, loss=0.276]


Epoch 1 Loss: 36.0618
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:04<00:00, 10.99it/s, loss=0.326]


Epoch 2 Loss: 18.2717
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:04<00:00, 11.36it/s, loss=0.27] 


Epoch 3 Loss: 13.2833
Test Metrics: Precision=0.9102, Recall=0.9102, F1=0.9102

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 420, Split 6...


Map: 100%|██████████| 420/420 [00:00<00:00, 8799.59 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7711.64 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9618.81 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:04<00:00, 11.08it/s, loss=0.356]


Epoch 1 Loss: 36.0247
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:05<00:00, 10.54it/s, loss=0.196]


Epoch 2 Loss: 18.3140
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:04<00:00, 10.93it/s, loss=0.233]


Epoch 3 Loss: 13.6061
Test Metrics: Precision=0.9101, Recall=0.9101, F1=0.9101

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 420, Split 7...


Map: 100%|██████████| 420/420 [00:00<00:00, 8797.96 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7638.90 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9303.84 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:04<00:00, 11.59it/s, loss=0.385]


Epoch 1 Loss: 37.0565
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:04<00:00, 11.28it/s, loss=0.228]


Epoch 2 Loss: 17.8837
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:04<00:00, 11.74it/s, loss=0.114]


Epoch 3 Loss: 13.0700
Test Metrics: Precision=0.9089, Recall=0.9089, F1=0.9089

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 420, Split 8...


Map: 100%|██████████| 420/420 [00:00<00:00, 9546.51 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 6341.39 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9533.18 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:04<00:00, 11.86it/s, loss=0.432]


Epoch 1 Loss: 36.0361
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:04<00:00, 12.08it/s, loss=0.322]


Epoch 2 Loss: 18.5772
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:04<00:00, 11.85it/s, loss=0.345]


Epoch 3 Loss: 13.4807
Test Metrics: Precision=0.9101, Recall=0.9101, F1=0.9101

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 420, Split 9...


Map: 100%|██████████| 420/420 [00:00<00:00, 9368.81 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7247.48 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9346.13 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:04<00:00, 11.29it/s, loss=0.409]


Epoch 1 Loss: 34.7687
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:04<00:00, 11.77it/s, loss=0.289]


Epoch 2 Loss: 18.0091
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:04<00:00, 11.77it/s, loss=0.259]


Epoch 3 Loss: 12.8306
Test Metrics: Precision=0.9049, Recall=0.9049, F1=0.9049

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 420, Split 10...


Map: 100%|██████████| 420/420 [00:00<00:00, 9171.70 examples/s]
Map: 100%|██████████| 84/84 [00:00<00:00, 7150.98 examples/s]
Map: 100%|██████████| 744/744 [00:00<00:00, 9727.13 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 53/53 [00:04<00:00, 11.36it/s, loss=0.28] 


Epoch 1 Loss: 38.1675
Epoch 2/3


Training Epoch 2: 100%|██████████| 53/53 [00:04<00:00, 11.34it/s, loss=0.318]


Epoch 2 Loss: 18.0876
Epoch 3/3


Training Epoch 3: 100%|██████████| 53/53 [00:04<00:00, 11.05it/s, loss=0.209]


Epoch 3 Loss: 13.2820
Test Metrics: Precision=0.9068, Recall=0.9068, F1=0.9068

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 440, Split 1...


Map: 100%|██████████| 440/440 [00:00<00:00, 8922.11 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7561.79 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9255.57 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:04<00:00, 11.30it/s, loss=0.289]


Epoch 1 Loss: 34.6444
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:04<00:00, 11.69it/s, loss=0.547]


Epoch 2 Loss: 17.5780
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:04<00:00, 11.67it/s, loss=0.189]


Epoch 3 Loss: 12.2912
Test Metrics: Precision=0.9117, Recall=0.9117, F1=0.9117

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 440, Split 2...


Map: 100%|██████████| 440/440 [00:00<00:00, 9392.64 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7743.93 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9323.01 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:04<00:00, 11.27it/s, loss=0.424]


Epoch 1 Loss: 33.6129
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:04<00:00, 11.53it/s, loss=0.219]


Epoch 2 Loss: 15.9884
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:04<00:00, 11.60it/s, loss=0.174] 


Epoch 3 Loss: 11.4937
Test Metrics: Precision=0.9097, Recall=0.9097, F1=0.9097

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 440, Split 3...


Map: 100%|██████████| 440/440 [00:00<00:00, 8889.32 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 8138.35 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9574.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:05<00:00, 10.79it/s, loss=0.359]


Epoch 1 Loss: 35.4139
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:05<00:00, 10.68it/s, loss=0.282]


Epoch 2 Loss: 16.8668
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:05<00:00, 10.62it/s, loss=0.226] 


Epoch 3 Loss: 11.8958
Test Metrics: Precision=0.9157, Recall=0.9157, F1=0.9157

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 440, Split 4...


Map: 100%|██████████| 440/440 [00:00<00:00, 9589.72 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 6233.41 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9401.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:04<00:00, 11.82it/s, loss=0.462]


Epoch 1 Loss: 36.9344
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:04<00:00, 11.56it/s, loss=0.308]


Epoch 2 Loss: 17.5377
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:04<00:00, 11.73it/s, loss=0.463]


Epoch 3 Loss: 12.4521
Test Metrics: Precision=0.9131, Recall=0.9131, F1=0.9131

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 440, Split 5...


Map: 100%|██████████| 440/440 [00:00<00:00, 8982.47 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7186.50 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9692.99 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:04<00:00, 11.08it/s, loss=0.395]


Epoch 1 Loss: 36.7683
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:05<00:00, 10.88it/s, loss=0.324]


Epoch 2 Loss: 18.7001
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:05<00:00, 10.78it/s, loss=0.151]


Epoch 3 Loss: 13.8557
Test Metrics: Precision=0.9093, Recall=0.9093, F1=0.9093

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 440, Split 6...


Map: 100%|██████████| 440/440 [00:00<00:00, 8549.41 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 8255.40 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9134.41 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:05<00:00, 10.41it/s, loss=0.603]


Epoch 1 Loss: 35.3457
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:05<00:00, 10.52it/s, loss=0.186]


Epoch 2 Loss: 18.2734
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:05<00:00, 10.67it/s, loss=0.214]


Epoch 3 Loss: 12.9934
Test Metrics: Precision=0.9123, Recall=0.9123, F1=0.9123

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 440, Split 7...


Map: 100%|██████████| 440/440 [00:00<00:00, 8941.48 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 6956.12 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9583.51 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:04<00:00, 11.05it/s, loss=0.259]


Epoch 1 Loss: 36.5740
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:04<00:00, 11.04it/s, loss=0.511]


Epoch 2 Loss: 17.2987
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:04<00:00, 11.26it/s, loss=0.225]


Epoch 3 Loss: 12.3934
Test Metrics: Precision=0.9125, Recall=0.9125, F1=0.9125

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 440, Split 8...


Map: 100%|██████████| 440/440 [00:00<00:00, 9496.80 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7687.00 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9261.05 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:04<00:00, 11.66it/s, loss=0.453]


Epoch 1 Loss: 35.7639
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:04<00:00, 11.72it/s, loss=0.312] 


Epoch 2 Loss: 17.8363
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:04<00:00, 11.68it/s, loss=0.341] 


Epoch 3 Loss: 13.2028
Test Metrics: Precision=0.9134, Recall=0.9134, F1=0.9134

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 440, Split 9...


Map: 100%|██████████| 440/440 [00:00<00:00, 9109.37 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7765.60 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9108.76 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:04<00:00, 11.33it/s, loss=0.398]


Epoch 1 Loss: 33.1433
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:04<00:00, 11.76it/s, loss=0.232]


Epoch 2 Loss: 16.6380
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:04<00:00, 11.29it/s, loss=0.164]


Epoch 3 Loss: 11.9859
Test Metrics: Precision=0.9102, Recall=0.9102, F1=0.9102

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 440, Split 10...


Map: 100%|██████████| 440/440 [00:00<00:00, 9026.27 examples/s]
Map: 100%|██████████| 88/88 [00:00<00:00, 7433.71 examples/s]
Map: 100%|██████████| 720/720 [00:00<00:00, 9337.07 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 55/55 [00:04<00:00, 11.08it/s, loss=0.567]


Epoch 1 Loss: 34.3061
Epoch 2/3


Training Epoch 2: 100%|██████████| 55/55 [00:04<00:00, 11.32it/s, loss=0.308]


Epoch 2 Loss: 16.2074
Epoch 3/3


Training Epoch 3: 100%|██████████| 55/55 [00:04<00:00, 11.19it/s, loss=0.222]


Epoch 3 Loss: 11.6773
Test Metrics: Precision=0.9141, Recall=0.9141, F1=0.9141

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 460, Split 1...


Map: 100%|██████████| 460/460 [00:00<00:00, 8988.03 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 8275.63 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9447.18 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:04<00:00, 11.71it/s, loss=0.363]


Epoch 1 Loss: 35.3197
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:04<00:00, 11.76it/s, loss=0.203]


Epoch 2 Loss: 17.8259
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:05<00:00, 11.48it/s, loss=0.126] 


Epoch 3 Loss: 12.5784
Test Metrics: Precision=0.9128, Recall=0.9128, F1=0.9128

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 460, Split 2...


Map: 100%|██████████| 460/460 [00:00<00:00, 9158.26 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7853.86 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9264.59 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:04<00:00, 11.61it/s, loss=0.253]


Epoch 1 Loss: 35.5875
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:04<00:00, 11.66it/s, loss=0.289]


Epoch 2 Loss: 17.8862
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:04<00:00, 11.72it/s, loss=0.426] 


Epoch 3 Loss: 13.3062
Test Metrics: Precision=0.9082, Recall=0.9082, F1=0.9082

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 460, Split 3...


Map: 100%|██████████| 460/460 [00:00<00:00, 8348.39 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 8092.19 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9469.49 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:05<00:00, 10.69it/s, loss=0.533]


Epoch 1 Loss: 37.2174
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:05<00:00, 10.97it/s, loss=0.217]


Epoch 2 Loss: 17.8792
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:05<00:00, 10.70it/s, loss=0.238] 


Epoch 3 Loss: 13.3596
Test Metrics: Precision=0.9136, Recall=0.9136, F1=0.9136

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 460, Split 4...


Map: 100%|██████████| 460/460 [00:00<00:00, 9572.19 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7003.07 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9426.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:05<00:00, 11.42it/s, loss=0.526]


Epoch 1 Loss: 34.7809
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:04<00:00, 11.67it/s, loss=0.241]


Epoch 2 Loss: 17.3401
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:04<00:00, 11.94it/s, loss=0.0819]


Epoch 3 Loss: 12.9090
Test Metrics: Precision=0.9118, Recall=0.9118, F1=0.9118

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 460, Split 5...


Map: 100%|██████████| 460/460 [00:00<00:00, 9174.42 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 9209.45 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9363.85 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:05<00:00, 10.90it/s, loss=0.245]


Epoch 1 Loss: 38.3160
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:05<00:00, 10.93it/s, loss=0.367]


Epoch 2 Loss: 19.7516
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:05<00:00, 11.10it/s, loss=0.245]


Epoch 3 Loss: 14.3280
Test Metrics: Precision=0.9056, Recall=0.9056, F1=0.9056

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 460, Split 6...


Map: 100%|██████████| 460/460 [00:00<00:00, 8546.61 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7224.38 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9389.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:05<00:00, 10.56it/s, loss=0.418]


Epoch 1 Loss: 35.2592
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:05<00:00, 10.63it/s, loss=0.168]


Epoch 2 Loss: 17.4881
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:05<00:00, 10.51it/s, loss=0.165]


Epoch 3 Loss: 13.1598
Test Metrics: Precision=0.9135, Recall=0.9135, F1=0.9135

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 460, Split 7...


Map: 100%|██████████| 460/460 [00:00<00:00, 8854.31 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 6673.51 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9566.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:05<00:00, 11.48it/s, loss=0.337]


Epoch 1 Loss: 37.6320
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:05<00:00, 11.29it/s, loss=0.253]


Epoch 2 Loss: 17.2011
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:05<00:00, 11.56it/s, loss=0.229]


Epoch 3 Loss: 12.5082
Test Metrics: Precision=0.9127, Recall=0.9127, F1=0.9127

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 460, Split 8...


Map: 100%|██████████| 460/460 [00:00<00:00, 8359.75 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 6608.14 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9072.71 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:05<00:00, 11.56it/s, loss=0.297]


Epoch 1 Loss: 34.2697
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:05<00:00, 10.88it/s, loss=0.341]


Epoch 2 Loss: 18.2624
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:05<00:00, 11.24it/s, loss=0.142]


Epoch 3 Loss: 13.6409
Test Metrics: Precision=0.9200, Recall=0.9200, F1=0.9200

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 460, Split 9...


Map: 100%|██████████| 460/460 [00:00<00:00, 9081.06 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7066.16 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9063.16 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:04<00:00, 11.68it/s, loss=0.385]


Epoch 1 Loss: 35.6640
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:05<00:00, 11.33it/s, loss=0.121]


Epoch 2 Loss: 18.0883
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:04<00:00, 11.67it/s, loss=0.13]  


Epoch 3 Loss: 12.4874
Test Metrics: Precision=0.9106, Recall=0.9106, F1=0.9106

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 460, Split 10...


Map: 100%|██████████| 460/460 [00:00<00:00, 9039.53 examples/s]
Map: 100%|██████████| 92/92 [00:00<00:00, 7026.15 examples/s]
Map: 100%|██████████| 696/696 [00:00<00:00, 9702.00 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 58/58 [00:05<00:00, 11.40it/s, loss=0.563]


Epoch 1 Loss: 35.5819
Epoch 2/3


Training Epoch 2: 100%|██████████| 58/58 [00:05<00:00, 11.06it/s, loss=0.287]


Epoch 2 Loss: 17.0547
Epoch 3/3


Training Epoch 3: 100%|██████████| 58/58 [00:05<00:00, 11.31it/s, loss=0.284] 


Epoch 3 Loss: 12.1452
Test Metrics: Precision=0.9147, Recall=0.9147, F1=0.9147

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 480, Split 1...


Map: 100%|██████████| 480/480 [00:00<00:00, 8219.02 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 8474.77 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9265.74 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:05<00:00, 11.60it/s, loss=0.253]


Epoch 1 Loss: 36.2707
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:05<00:00, 11.49it/s, loss=0.279]


Epoch 2 Loss: 16.9667
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:05<00:00, 11.78it/s, loss=0.206] 


Epoch 3 Loss: 12.3834
Test Metrics: Precision=0.9118, Recall=0.9118, F1=0.9118

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 480, Split 2...


Map: 100%|██████████| 480/480 [00:00<00:00, 9015.73 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7786.91 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9284.81 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:05<00:00, 11.24it/s, loss=0.46] 


Epoch 1 Loss: 36.4956
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:05<00:00, 11.38it/s, loss=0.21] 


Epoch 2 Loss: 19.2253
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:05<00:00, 11.58it/s, loss=0.194]


Epoch 3 Loss: 13.9395
Test Metrics: Precision=0.9108, Recall=0.9108, F1=0.9108

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 480, Split 3...


Map: 100%|██████████| 480/480 [00:00<00:00, 8627.07 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 8202.85 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9550.73 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:05<00:00, 10.54it/s, loss=0.289]


Epoch 1 Loss: 38.6160
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:05<00:00, 10.66it/s, loss=0.249]


Epoch 2 Loss: 18.8954
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:05<00:00, 10.82it/s, loss=0.364]


Epoch 3 Loss: 13.7315
Test Metrics: Precision=0.9133, Recall=0.9133, F1=0.9133

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 480, Split 4...


Map: 100%|██████████| 480/480 [00:00<00:00, 9221.04 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 8806.74 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9006.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:05<00:00, 11.31it/s, loss=0.345]


Epoch 1 Loss: 37.8740
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:05<00:00, 11.45it/s, loss=0.212]


Epoch 2 Loss: 18.5763
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:05<00:00, 11.32it/s, loss=0.224] 


Epoch 3 Loss: 13.5243
Test Metrics: Precision=0.9125, Recall=0.9125, F1=0.9125

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 480, Split 5...


Map: 100%|██████████| 480/480 [00:00<00:00, 8910.46 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 9032.35 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9400.69 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:05<00:00, 11.35it/s, loss=0.276]


Epoch 1 Loss: 39.0731
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:05<00:00, 10.98it/s, loss=0.209]


Epoch 2 Loss: 18.6223
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:05<00:00, 10.85it/s, loss=0.345]


Epoch 3 Loss: 13.4687
Test Metrics: Precision=0.9099, Recall=0.9099, F1=0.9099

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 480, Split 6...


Map: 100%|██████████| 480/480 [00:00<00:00, 8583.34 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 8007.90 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9523.30 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:05<00:00, 10.55it/s, loss=0.493]


Epoch 1 Loss: 39.3884
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:05<00:00, 10.12it/s, loss=0.358]


Epoch 2 Loss: 19.3401
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:05<00:00, 10.61it/s, loss=0.126]


Epoch 3 Loss: 14.2394
Test Metrics: Precision=0.9148, Recall=0.9148, F1=0.9148

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 480, Split 7...


Map: 100%|██████████| 480/480 [00:00<00:00, 9073.83 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 6107.84 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9620.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:05<00:00, 11.43it/s, loss=0.45] 


Epoch 1 Loss: 37.8312
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:05<00:00, 11.24it/s, loss=0.282]


Epoch 2 Loss: 17.2608
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:05<00:00, 11.18it/s, loss=0.131]


Epoch 3 Loss: 12.7373
Test Metrics: Precision=0.9140, Recall=0.9140, F1=0.9140

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 480, Split 8...


Map: 100%|██████████| 480/480 [00:00<00:00, 9200.47 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 8158.97 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9366.64 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:05<00:00, 11.31it/s, loss=0.313]


Epoch 1 Loss: 38.5032
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:05<00:00, 11.45it/s, loss=0.261]


Epoch 2 Loss: 20.0905
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:05<00:00, 11.21it/s, loss=0.364] 


Epoch 3 Loss: 14.9455
Test Metrics: Precision=0.9097, Recall=0.9097, F1=0.9097

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 480, Split 9...


Map: 100%|██████████| 480/480 [00:00<00:00, 2044.24 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 7160.06 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9156.44 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:05<00:00, 11.75it/s, loss=0.442]


Epoch 1 Loss: 35.5679
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:05<00:00, 11.13it/s, loss=0.235]


Epoch 2 Loss: 17.2080
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:05<00:00, 11.49it/s, loss=0.201] 


Epoch 3 Loss: 12.2581
Test Metrics: Precision=0.9138, Recall=0.9138, F1=0.9138

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 480, Split 10...


Map: 100%|██████████| 480/480 [00:00<00:00, 8968.34 examples/s]
Map: 100%|██████████| 96/96 [00:00<00:00, 8688.17 examples/s]
Map: 100%|██████████| 672/672 [00:00<00:00, 9091.52 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 60/60 [00:05<00:00, 11.35it/s, loss=0.323]


Epoch 1 Loss: 36.3782
Epoch 2/3


Training Epoch 2: 100%|██████████| 60/60 [00:05<00:00, 11.25it/s, loss=0.191]


Epoch 2 Loss: 17.5654
Epoch 3/3


Training Epoch 3: 100%|██████████| 60/60 [00:05<00:00, 11.32it/s, loss=0.149]


Epoch 3 Loss: 12.7418
Test Metrics: Precision=0.9143, Recall=0.9143, F1=0.9143

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 500, Split 1...


Map: 100%|██████████| 500/500 [00:00<00:00, 8811.19 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 8445.19 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9107.39 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:05<00:00, 11.46it/s, loss=0.126]


Epoch 1 Loss: 38.2209
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:05<00:00, 11.41it/s, loss=0.161]


Epoch 2 Loss: 18.3235
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:05<00:00, 11.79it/s, loss=0.275]


Epoch 3 Loss: 13.7182
Test Metrics: Precision=0.9153, Recall=0.9153, F1=0.9153

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 500, Split 2...


Map: 100%|██████████| 500/500 [00:00<00:00, 9168.52 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 8075.75 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 8915.50 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:05<00:00, 11.48it/s, loss=0.383]


Epoch 1 Loss: 36.7570
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:05<00:00, 11.66it/s, loss=0.218]


Epoch 2 Loss: 18.2453
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:05<00:00, 11.60it/s, loss=0.21]  


Epoch 3 Loss: 12.9862
Test Metrics: Precision=0.9155, Recall=0.9155, F1=0.9155

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 500, Split 3...


Map: 100%|██████████| 500/500 [00:00<00:00, 8956.83 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7593.84 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9776.40 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:05<00:00, 11.19it/s, loss=0.287]


Epoch 1 Loss: 41.0286
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:05<00:00, 10.67it/s, loss=0.264]


Epoch 2 Loss: 20.1451
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:05<00:00, 10.95it/s, loss=0.24] 


Epoch 3 Loss: 14.2358
Test Metrics: Precision=0.9142, Recall=0.9142, F1=0.9142

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 500, Split 4...


Map: 100%|██████████| 500/500 [00:00<00:00, 9066.77 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7556.08 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9389.13 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:05<00:00, 11.64it/s, loss=0.385]


Epoch 1 Loss: 38.7691
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:05<00:00, 11.49it/s, loss=0.275]


Epoch 2 Loss: 19.0492
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:05<00:00, 11.67it/s, loss=0.349]


Epoch 3 Loss: 14.0789
Test Metrics: Precision=0.9101, Recall=0.9101, F1=0.9101

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 500, Split 5...


Map: 100%|██████████| 500/500 [00:00<00:00, 8790.14 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7557.58 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9519.12 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:05<00:00, 11.26it/s, loss=0.401]


Epoch 1 Loss: 38.1198
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:05<00:00, 11.08it/s, loss=0.237]


Epoch 2 Loss: 19.0060
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:05<00:00, 11.14it/s, loss=0.11]  


Epoch 3 Loss: 13.8003
Test Metrics: Precision=0.9120, Recall=0.9120, F1=0.9120

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 500, Split 6...


Map: 100%|██████████| 500/500 [00:00<00:00, 8518.39 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7977.75 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9573.95 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:05<00:00, 10.80it/s, loss=0.317]


Epoch 1 Loss: 40.5493
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:05<00:00, 11.06it/s, loss=0.154]


Epoch 2 Loss: 18.4191
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:05<00:00, 10.60it/s, loss=0.109]


Epoch 3 Loss: 13.8455
Test Metrics: Precision=0.9167, Recall=0.9167, F1=0.9167

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 500, Split 7...


Map: 100%|██████████| 500/500 [00:00<00:00, 8802.10 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 9065.04 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9297.72 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:05<00:00, 11.42it/s, loss=1.02] 


Epoch 1 Loss: 39.0310
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:05<00:00, 11.33it/s, loss=0.247]


Epoch 2 Loss: 18.1271
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:05<00:00, 11.49it/s, loss=0.144] 


Epoch 3 Loss: 13.3471
Test Metrics: Precision=0.9113, Recall=0.9113, F1=0.9113

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 500, Split 8...


Map: 100%|██████████| 500/500 [00:00<00:00, 9192.35 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 8628.12 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9281.87 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:05<00:00, 11.30it/s, loss=0.429]


Epoch 1 Loss: 41.0232
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:05<00:00, 11.36it/s, loss=0.229]


Epoch 2 Loss: 20.1548
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:05<00:00, 11.23it/s, loss=0.156] 


Epoch 3 Loss: 15.0812
Test Metrics: Precision=0.9151, Recall=0.9151, F1=0.9151

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 500, Split 9...


Map: 100%|██████████| 500/500 [00:00<00:00, 8834.58 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7786.41 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 8833.10 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:05<00:00, 11.48it/s, loss=0.148]


Epoch 1 Loss: 38.3111
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:05<00:00, 11.38it/s, loss=0.12] 


Epoch 2 Loss: 19.7428
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:05<00:00, 11.47it/s, loss=0.138]


Epoch 3 Loss: 14.1154
Test Metrics: Precision=0.9088, Recall=0.9088, F1=0.9088

Fine-tuning dmis-lab/biobert-v1.1 (state) with Train Size 500, Split 10...


Map: 100%|██████████| 500/500 [00:00<00:00, 9106.06 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 7003.35 examples/s]
Map: 100%|██████████| 648/648 [00:00<00:00, 9559.97 examples/s]
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


Training Epoch 1: 100%|██████████| 63/63 [00:05<00:00, 11.33it/s, loss=0.338]


Epoch 1 Loss: 37.3259
Epoch 2/3


Training Epoch 2: 100%|██████████| 63/63 [00:05<00:00, 11.58it/s, loss=0.288]


Epoch 2 Loss: 17.7902
Epoch 3/3


Training Epoch 3: 100%|██████████| 63/63 [00:05<00:00, 11.31it/s, loss=0.101]


Epoch 3 Loss: 12.9685
Test Metrics: Precision=0.9109, Recall=0.9109, F1=0.9109
Results saved to Experiments_moreksplits10_lesssteps20_for_smoother_graphh_biobert.xlsx
