In [20]:
# !pip install evaluate
# !pip install seqeval

In [22]:
# !pip install accelerate -U
# !pip install transformers[torch]

In [2]:
import sys
sys.path.append("..")
from helper import align_labels
import helper
import collections

## Dataset

### Read the Train Dataset (Sentences and Labels)

In [3]:
# Open the Data of Sentences
with open('subtask2_train.data.txt', 'r', encoding='utf-8') as file:
    sentences = file.readlines()


# Open the label data
with open('subtask2_train.labels.txt', 'r', encoding='utf-8') as file:
    labels = file.readlines()

### Convert the Dataset into a pandas dataframe

In [4]:
import pandas as pd

dataset = pd.DataFrame(list(zip(sentences, labels)), columns=['sentences', 'labels'])
# split sentences and labels
dataset['sentences'] = dataset['sentences'].apply(lambda row: row.split())
dataset['labels'] = dataset['labels'].apply(lambda row: row.split())

### Unique labels

In [5]:
unique_labels = []

for line in labels:
    labels_list = line.split()
    for label in labels_list:
        if label not in unique_labels:
            unique_labels.append(label)

uniqueLabel_to_ID = {unique_label: ID for ID, unique_label in enumerate(unique_labels)}
ID_to_uniqueLable = {ID: unique_label for ID, unique_label in enumerate(unique_labels)}

### Map each label to its ID

In [6]:
dataset['IDs'] = dataset['labels'].apply(lambda row: [uniqueLabel_to_ID.get(label) for label in row])

# Prepare the train and validation dataset

In [7]:
from sklearn.model_selection import train_test_split

train, validation = train_test_split(dataset, test_size=0.25, random_state=42)

In [8]:
len(train), len(validation)

(1764, 589)

## Tokenize and convet the labels from tokenized into IDs

In [9]:
from transformers import BertTokenizerFast, AutoTokenizer

tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [10]:
def tokenization(input_data):
    tokenized_train_inputs = tokenizer(
        input_data,
        return_tensors="pt",
        truncation=True,
        is_split_into_words=True,
        padding="do_not_pad",
        max_length=512,
    )

    return tokenized_train_inputs

### Trian Tokenization

In [11]:
train['tokenized'] = train.apply(lambda row: helper.tokenized_align_labels(tokenization(row['sentences']), row['IDs']), axis=1)

### Validation Tokenization

In [12]:
validation['tokenized'] = validation.apply(lambda row: helper.tokenized_align_labels(tokenization(row['sentences']), row['IDs']), axis=1)

### Prepare Train dataset for fine-tuning

In [13]:
Train_dataset = train.tokenized.apply(
    lambda x: {
        k: v[0]
        if type(v) is not list
        else torch.tensor(v)
        for k, v in x.items()}).to_list()

### Prepare validation dataset for fine-tuning

In [14]:
Val_dataset = validation.tokenized.apply(
    lambda x: {
        k: v[0]
        if type(v) is not list
        else torch.tensor(v)
        for k, v in x.items()}).to_list()

## Load pretrained model

In [15]:
from transformers import AutoModelForTokenClassification
### todo guck mal nach cased und uncased ob es mit cased besser funktioniert oder mit uncased
model_name = "bert-base-uncased"
model = AutoModelForTokenClassification.from_pretrained(model_name, id2label=ID_to_uniqueLable, num_labels=len(unique_labels))

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Evaluation

In [44]:
import evaluate
import numpy as np
metric = evaluate.load('seqeval')


def compute_metrics(eval_preds, label_class=unique_labels):
    logits, labels = eval_preds
    # becase the logics and probabilities both are in the same order, we don't need to aply softmax here
    predictions = np.argmax(logits, axis=-1)
    # now we need to remove all the values, where the label is -100
    # before passing to metric.compute we should have these inputs as a list
    true_labels = [[label_class[l] for l in label if l != -100]
                   for label in labels]

    true_predictions = [[label_class[p] for p,l in zip(prediction, label) if l != -100]
                        for prediction, label in zip(predictions, labels)]

    all_metrics = metric.compute(predictions=true_predictions, references=true_labels, zero_division=0)
    support_all = 0
    f1_weighted = {}
    label_values = {}
    entity_labels = list({l[2:] for l in label_class if l != 'O'})
    for label in entity_labels:
      label_metrics = all_metrics.get(label, dict())
      for metric_label, value in label_metrics.items():
        if metric_label == 'number':
          support_all += value
        if metric_label == 'f1':
          f1_weighted[label] = value * label_metrics['number']
        label_values[f"{metric_label}_{label}"] = value
      if label_metrics:
        del all_metrics[label]
    f1_macro_weighted = 0.
    if support_all != 0:
      for f1 in f1_weighted.values():
        f1_macro_weighted += f1 / support_all
    all_metrics[f"f1_macro_weighted"] = f1_macro_weighted
    all_metrics |= label_values
    return all_metrics

# Parameter optimization

## Define, perform the hyperparameter search

In [42]:
# !pip install transformers optuna datasets

In [None]:
import optuna
from transformers import Trainer, TrainingArguments, BertForTokenClassification
from transformers import DataCollatorForTokenClassification

data_collator = DataCollatorForTokenClassification(tokenizer)
def model_init():
    return BertForTokenClassification.from_pretrained(model_name, num_labels=len(unique_labels), return_dict=True)



def objective(trial):
    learning_rate = trial.suggest_float('learning_rate',  1e-5, 5e-5, log=True)
    batch_size = trial.suggest_categorical('batch_size', [4, 8, 16, 32])
    epochs = trial.suggest_int("num_train_epochs", 3, 7)
    weight_decay = trial.suggest_float("weight_decay",  1e-5, 0.1, log=True)

    training_args = TrainingArguments(
        output_dir="./output",
        eval_strategy="epoch",
        learning_rate=learning_rate,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=epochs,
        weight_decay=weight_decay,
        logging_dir='./logs',
        logging_steps=100,
        warmup_ratio=0.1,
        gradient_accumulation_steps=2,
        max_grad_norm=1.0
    )



    trainer = Trainer(
        model_init=model_init,
        args=training_args,
        train_dataset=Train_dataset,
        eval_dataset=Val_dataset,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )

    trainer.train()

    eval_result = trainer.evaluate()
    overall_f1 = eval_result['eval_f1_macro_weighted']
    return overall_f1

study = optuna.create_study(direction='maximize')
assert len(study.trials) == 0
study.optimize(objective, n_trials=20)

if len(study.trials) == 0 or all([t.state != optuna.trial.TrialState.COMPLETE for t in study.trials]):
    print("No trials are completed yet.")
else:
  print("Best trial:")
  trial = study.best_trial
  print(trial.values)
  print("Best hyperparameters: {}".format(trial.params))


[I 2024-07-16 10:49:33,407] A new study created in memory with name: no-name-260f22a8-439e-47c8-8d9f-2925842f12bb
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Overall Precision,Overall Recall,Overall F1,Overall Accuracy,F1 Macro Weighted,Precision Version,Recall Version,F1 Version,Number Version,Precision Alternativename,Recall Alternativename,F1 Alternativename,Number Alternativename,Precision Extension,Recall Extension,F1 Extension,Number Extension,Precision Citation,Recall Citation,F1 Citation,Number Citation,Precision License,Recall License,F1 License,Number License,Precision Developer,Recall Developer,F1 Developer,Number Developer,Precision Release,Recall Release,F1 Release,Number Release,Precision Url,Recall Url,F1 Url,Number Url,Precision Abbreviation,Recall Abbreviation,F1 Abbreviation,Number Abbreviation
0,No log,0.142776,0.745736,0.677465,0.709963,0.969494,0.645675,0.884488,0.940351,0.911565,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.696629,0.953846,0.805195,130,0.0,0.0,0.0,9,0.162791,0.090323,0.116183,155,0.0,0.0,0.0,16,0.961538,0.9375,0.949367,80,0.0,0.0,0.0,15
2,0.533500,0.072795,0.791557,0.84507,0.817439,0.981672,0.788755,0.873016,0.964912,0.916667,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.767123,0.861538,0.811594,130,0.0,0.0,0.0,9,0.649038,0.870968,0.743802,155,0.666667,0.125,0.210526,16,0.915663,0.95,0.932515,80,0.0,0.0,0.0,15


[I 2024-07-16 10:51:14,671] Trial 0 finished with value: 0.7887551417206412 and parameters: {'learning_rate': 2.4612471273961025e-05, 'batch_size': 16, 'num_train_epochs': 3, 'weight_decay': 2.61958694318397e-05}. Best is trial 0 with value: 0.7887551417206412.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Overall Precision,Overall Recall,Overall F1,Overall Accuracy,F1 Macro Weighted,Precision Version,Recall Version,F1 Version,Number Version,Precision Alternativename,Recall Alternativename,F1 Alternativename,Number Alternativename,Precision Extension,Recall Extension,F1 Extension,Number Extension,Precision Citation,Recall Citation,F1 Citation,Number Citation,Precision License,Recall License,F1 License,Number License,Precision Developer,Recall Developer,F1 Developer,Number Developer,Precision Release,Recall Release,F1 Release,Number Release,Precision Url,Recall Url,F1 Url,Number Url,Precision Abbreviation,Recall Abbreviation,F1 Abbreviation,Number Abbreviation
0,1.1188,0.140258,0.739264,0.678873,0.707783,0.970724,0.645753,0.887417,0.940351,0.913118,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.696629,0.953846,0.805195,130,0.0,0.0,0.0,9,0.152174,0.090323,0.11336,155,0.0,0.0,0.0,16,0.95,0.95,0.95,80,0.0,0.0,0.0,15
2,0.0711,0.063725,0.827397,0.850704,0.838889,0.983701,0.816993,0.895425,0.961404,0.927242,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.80597,0.830769,0.818182,130,0.5,0.333333,0.4,9,0.710526,0.870968,0.782609,155,0.571429,0.5,0.533333,16,0.95,0.95,0.95,80,0.0,0.0,0.0,15
4,0.0465,0.057103,0.797665,0.866197,0.83052,0.985731,0.818275,0.901316,0.961404,0.93039,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.793103,0.884615,0.836364,130,0.333333,0.333333,0.333333,9,0.700508,0.890323,0.784091,155,0.36,0.5625,0.439024,16,0.938272,0.95,0.944099,80,0.0,0.0,0.0,15


[I 2024-07-16 10:54:10,085] Trial 1 finished with value: 0.8182746401597264 and parameters: {'learning_rate': 1.109728005766158e-05, 'batch_size': 8, 'num_train_epochs': 5, 'weight_decay': 0.00039908643992766346}. Best is trial 1 with value: 0.8182746401597264.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Overall Precision,Overall Recall,Overall F1,Overall Accuracy,F1 Macro Weighted,Precision Version,Recall Version,F1 Version,Number Version,Precision Alternativename,Recall Alternativename,F1 Alternativename,Number Alternativename,Precision Extension,Recall Extension,F1 Extension,Number Extension,Precision Citation,Recall Citation,F1 Citation,Number Citation,Precision License,Recall License,F1 License,Number License,Precision Developer,Recall Developer,F1 Developer,Number Developer,Precision Release,Recall Release,F1 Release,Number Release,Precision Url,Recall Url,F1 Url,Number Url,Precision Abbreviation,Recall Abbreviation,F1 Abbreviation,Number Abbreviation
1,No log,0.390714,0.0,0.0,0.0,0.937696,0.0,0.0,0.0,0.0,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.0,0.0,0.0,130,0.0,0.0,0.0,9,0.0,0.0,0.0,155,0.0,0.0,0.0,16,0.0,0.0,0.0,80,0.0,0.0,0.0,15
2,No log,0.209578,0.912773,0.412676,0.56838,0.95504,0.456212,0.937255,0.838596,0.885185,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.818182,0.415385,0.55102,130,0.0,0.0,0.0,9,0.0,0.0,0.0,155,0.0,0.0,0.0,16,0.0,0.0,0.0,80,0.0,0.0,0.0,15
3,No log,0.126078,0.7429,0.7,0.720812,0.973,0.680293,0.873377,0.94386,0.907251,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.713333,0.823077,0.764286,130,0.0,0.0,0.0,9,0.340909,0.290323,0.313589,155,0.0,0.0,0.0,16,0.962025,0.95,0.955975,80,0.0,0.0,0.0,15
4,0.675000,0.099226,0.774366,0.816901,0.795065,0.979581,0.762945,0.866667,0.957895,0.91,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.732026,0.861538,0.791519,130,0.0,0.0,0.0,9,0.595,0.767742,0.670423,155,0.0,0.0,0.0,16,0.938272,0.95,0.944099,80,0.0,0.0,0.0,15
5,0.675000,0.091161,0.802469,0.823944,0.813065,0.980442,0.779208,0.869427,0.957895,0.911519,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.780142,0.846154,0.811808,130,0.0,0.0,0.0,9,0.649485,0.812903,0.722063,155,0.0,0.0,0.0,16,0.95,0.95,0.95,80,0.0,0.0,0.0,15
6,0.675000,0.089044,0.78496,0.838028,0.810627,0.980134,0.777881,0.869841,0.961404,0.913333,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.745098,0.876923,0.805654,130,0.0,0.0,0.0,9,0.62381,0.845161,0.717808,155,0.0,0.0,0.0,16,0.95,0.95,0.95,80,0.0,0.0,0.0,15


[I 2024-07-16 10:57:48,767] Trial 2 finished with value: 0.7778806426827581 and parameters: {'learning_rate': 1.3482448887298277e-05, 'batch_size': 32, 'num_train_epochs': 6, 'weight_decay': 0.011371726970768803}. Best is trial 1 with value: 0.8182746401597264.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Overall Precision,Overall Recall,Overall F1,Overall Accuracy,F1 Macro Weighted,Precision Version,Recall Version,F1 Version,Number Version,Precision Alternativename,Recall Alternativename,F1 Alternativename,Number Alternativename,Precision Extension,Recall Extension,F1 Extension,Number Extension,Precision Citation,Recall Citation,F1 Citation,Number Citation,Precision License,Recall License,F1 License,Number License,Precision Developer,Recall Developer,F1 Developer,Number Developer,Precision Release,Recall Release,F1 Release,Number Release,Precision Url,Recall Url,F1 Url,Number Url,Precision Abbreviation,Recall Abbreviation,F1 Abbreviation,Number Abbreviation
0,1.0621,0.122591,0.739803,0.740845,0.740324,0.974045,0.697291,0.861199,0.957895,0.906977,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.696629,0.953846,0.805195,130,0.0,0.0,0.0,9,0.395522,0.341935,0.366782,155,0.0,0.0,0.0,16,0.926829,0.95,0.938272,80,0.0,0.0,0.0,15
2,0.0655,0.060117,0.820513,0.856338,0.838043,0.98407,0.820172,0.898361,0.961404,0.928814,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.801471,0.838462,0.819549,130,0.428571,0.333333,0.375,9,0.717277,0.883871,0.791908,155,0.5,0.5625,0.529412,16,0.962025,0.95,0.955975,80,0.0,0.0,0.0,15
4,0.0411,0.053952,0.807592,0.869014,0.837178,0.986162,0.827186,0.907285,0.961404,0.93356,285,0.111111,0.1,0.105263,10,0.0,0.0,0.0,10,0.808511,0.876923,0.841328,130,0.333333,0.333333,0.333333,9,0.720207,0.896774,0.798851,155,0.333333,0.5625,0.418605,16,0.938272,0.95,0.944099,80,0.5,0.066667,0.117647,15


[I 2024-07-16 11:00:46,356] Trial 3 finished with value: 0.8271862407652768 and parameters: {'learning_rate': 1.2618058581844125e-05, 'batch_size': 8, 'num_train_epochs': 5, 'weight_decay': 4.038753090994087e-05}. Best is trial 3 with value: 0.8271862407652768.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Overall Precision,Overall Recall,Overall F1,Overall Accuracy,F1 Macro Weighted,Precision Version,Recall Version,F1 Version,Number Version,Precision Alternativename,Recall Alternativename,F1 Alternativename,Number Alternativename,Precision Extension,Recall Extension,F1 Extension,Number Extension,Precision Citation,Recall Citation,F1 Citation,Number Citation,Precision License,Recall License,F1 License,Number License,Precision Developer,Recall Developer,F1 Developer,Number Developer,Precision Release,Recall Release,F1 Release,Number Release,Precision Url,Recall Url,F1 Url,Number Url,Precision Abbreviation,Recall Abbreviation,F1 Abbreviation,Number Abbreviation
0,0.1737,0.097652,0.814346,0.815493,0.814919,0.980257,0.779494,0.90301,0.947368,0.924658,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.681319,0.953846,0.794872,130,0.0,0.0,0.0,9,0.736486,0.703226,0.719472,155,0.0,0.0,0.0,16,0.926829,0.95,0.938272,80,0.0,0.0,0.0,15
2,0.0422,0.051235,0.827048,0.895775,0.860041,0.987453,0.859493,0.901639,0.964912,0.932203,285,0.285714,0.2,0.235294,10,1.0,0.5,0.666667,10,0.820144,0.876923,0.847584,130,0.5,0.444444,0.470588,9,0.765957,0.929032,0.83965,155,0.333333,0.5625,0.418605,16,0.938272,0.95,0.944099,80,0.777778,0.466667,0.583333,15
4,0.0187,0.049144,0.8738,0.897183,0.885337,0.989175,0.885809,0.923333,0.97193,0.947009,285,0.625,0.5,0.555556,10,1.0,0.7,0.823529,10,0.892562,0.830769,0.860558,130,0.555556,0.555556,0.555556,9,0.827381,0.896774,0.860681,155,0.409091,0.5625,0.473684,16,0.95,0.95,0.95,80,0.785714,0.733333,0.758621,15
5,0.0176,0.048906,0.86285,0.912676,0.887064,0.98936,0.889619,0.923077,0.968421,0.945205,285,0.75,0.6,0.666667,10,1.0,0.8,0.888889,10,0.846715,0.892308,0.868914,130,0.5,0.555556,0.526316,9,0.839286,0.909677,0.873065,155,0.36,0.5625,0.439024,16,0.95,0.95,0.95,80,0.6875,0.733333,0.709677,15


[I 2024-07-16 11:04:59,888] Trial 4 finished with value: 0.8896188587836443 and parameters: {'learning_rate': 1.1919459316702425e-05, 'batch_size': 4, 'num_train_epochs': 6, 'weight_decay': 0.02374607192919603}. Best is trial 4 with value: 0.8896188587836443.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Overall Precision,Overall Recall,Overall F1,Overall Accuracy,F1 Macro Weighted,Precision Version,Recall Version,F1 Version,Number Version,Precision Alternativename,Recall Alternativename,F1 Alternativename,Number Alternativename,Precision Extension,Recall Extension,F1 Extension,Number Extension,Precision Citation,Recall Citation,F1 Citation,Number Citation,Precision License,Recall License,F1 License,Number License,Precision Developer,Recall Developer,F1 Developer,Number Developer,Precision Release,Recall Release,F1 Release,Number Release,Precision Url,Recall Url,F1 Url,Number Url,Precision Abbreviation,Recall Abbreviation,F1 Abbreviation,Number Abbreviation
1,No log,0.337775,0.0,0.0,0.0,0.937696,0.0,0.0,0.0,0.0,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.0,0.0,0.0,130,0.0,0.0,0.0,9,0.0,0.0,0.0,155,0.0,0.0,0.0,16,0.0,0.0,0.0,80,0.0,0.0,0.0,15
2,No log,0.163509,0.763293,0.626761,0.688322,0.96605,0.61355,0.894915,0.926316,0.910345,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.7125,0.876923,0.786207,130,0.0,0.0,0.0,9,0.04918,0.019355,0.027778,155,0.0,0.0,0.0,16,0.955224,0.8,0.870748,80,0.0,0.0,0.0,15
3,No log,0.110982,0.723347,0.75493,0.738801,0.976567,0.713663,0.871383,0.950877,0.909396,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.731544,0.838462,0.781362,130,0.0,0.0,0.0,9,0.39604,0.516129,0.448179,155,0.0,0.0,0.0,16,0.962025,0.95,0.955975,80,0.0,0.0,0.0,15
4,0.559300,0.092935,0.729084,0.773239,0.750513,0.977366,0.724876,0.872611,0.961404,0.914858,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.741722,0.861538,0.797153,130,0.0,0.0,0.0,9,0.418269,0.56129,0.479339,155,0.0,0.0,0.0,16,0.95,0.95,0.95,80,0.0,0.0,0.0,15
5,0.559300,0.089093,0.763329,0.826761,0.79378,0.979765,0.763066,0.859375,0.964912,0.909091,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.741722,0.861538,0.797153,130,0.0,0.0,0.0,9,0.568807,0.8,0.664879,155,0.0,0.0,0.0,16,0.95,0.95,0.95,80,0.0,0.0,0.0,15


[I 2024-07-16 11:08:02,211] Trial 5 finished with value: 0.7630663416865349 and parameters: {'learning_rate': 1.8953292783183534e-05, 'batch_size': 32, 'num_train_epochs': 5, 'weight_decay': 0.027940706621759664}. Best is trial 4 with value: 0.8896188587836443.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Overall Precision,Overall Recall,Overall F1,Overall Accuracy,F1 Macro Weighted,Precision Version,Recall Version,F1 Version,Number Version,Precision Alternativename,Recall Alternativename,F1 Alternativename,Number Alternativename,Precision Extension,Recall Extension,F1 Extension,Number Extension,Precision Citation,Recall Citation,F1 Citation,Number Citation,Precision License,Recall License,F1 License,Number License,Precision Developer,Recall Developer,F1 Developer,Number Developer,Precision Release,Recall Release,F1 Release,Number Release,Precision Url,Recall Url,F1 Url,Number Url,Precision Abbreviation,Recall Abbreviation,F1 Abbreviation,Number Abbreviation
0,0.9831,0.109357,0.791726,0.78169,0.786676,0.977059,0.747937,0.888525,0.950877,0.918644,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.696629,0.953846,0.805195,130,0.0,0.0,0.0,9,0.617647,0.541935,0.57732,155,0.0,0.0,0.0,16,0.926829,0.95,0.938272,80,0.0,0.0,0.0,15
2,0.0594,0.055807,0.826087,0.856338,0.840941,0.986039,0.825925,0.91,0.957895,0.933333,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.816176,0.853846,0.834586,130,0.375,0.333333,0.352941,9,0.743169,0.877419,0.804734,155,0.45,0.5625,0.5,16,0.95,0.95,0.95,80,0.0,0.0,0.0,15
4,0.0348,0.051326,0.829396,0.890141,0.858696,0.98733,0.857649,0.916388,0.961404,0.938356,285,0.375,0.3,0.333333,10,1.0,0.2,0.333333,10,0.809859,0.884615,0.845588,130,0.444444,0.444444,0.444444,9,0.770492,0.909677,0.83432,155,0.333333,0.5625,0.418605,16,0.95,0.95,0.95,80,0.666667,0.533333,0.592593,15


[I 2024-07-16 11:11:03,201] Trial 6 finished with value: 0.8576489211815906 and parameters: {'learning_rate': 1.5117756753273184e-05, 'batch_size': 8, 'num_train_epochs': 5, 'weight_decay': 1.651580629308851e-05}. Best is trial 4 with value: 0.8896188587836443.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Overall Precision,Overall Recall,Overall F1,Overall Accuracy,F1 Macro Weighted,Precision Version,Recall Version,F1 Version,Number Version,Precision Alternativename,Recall Alternativename,F1 Alternativename,Number Alternativename,Precision Extension,Recall Extension,F1 Extension,Number Extension,Precision Citation,Recall Citation,F1 Citation,Number Citation,Precision License,Recall License,F1 License,Number License,Precision Developer,Recall Developer,F1 Developer,Number Developer,Precision Release,Recall Release,F1 Release,Number Release,Precision Url,Recall Url,F1 Url,Number Url,Precision Abbreviation,Recall Abbreviation,F1 Abbreviation,Number Abbreviation
0,No log,0.133467,0.747692,0.684507,0.714706,0.970847,0.660264,0.90378,0.922807,0.913194,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.696629,0.953846,0.805195,130,0.0,0.0,0.0,9,0.227723,0.148387,0.179687,155,0.0,0.0,0.0,16,0.95,0.95,0.95,80,0.0,0.0,0.0,15
2,0.581600,0.053116,0.840108,0.873239,0.856354,0.985792,0.853232,0.915825,0.954386,0.934708,285,0.0,0.0,0.0,10,1.0,0.3,0.461538,10,0.835938,0.823077,0.829457,130,0.5,0.444444,0.470588,9,0.787709,0.909677,0.844311,155,0.409091,0.5625,0.473684,16,0.95,0.95,0.95,80,0.571429,0.533333,0.551724,15
4,0.042600,0.050768,0.852785,0.905634,0.878415,0.988191,0.878902,0.925926,0.964912,0.945017,285,0.714286,0.5,0.588235,10,1.0,0.7,0.823529,10,0.824818,0.869231,0.846442,130,0.444444,0.444444,0.444444,9,0.786517,0.903226,0.840841,155,0.631579,0.75,0.685714,16,0.938272,0.95,0.944099,80,0.578947,0.733333,0.647059,15
5,0.021200,0.050987,0.853403,0.91831,0.884668,0.988437,0.886399,0.923077,0.968421,0.945205,285,0.857143,0.6,0.705882,10,1.0,0.7,0.823529,10,0.825175,0.907692,0.864469,130,0.5,0.555556,0.526316,9,0.804598,0.903226,0.851064,155,0.545455,0.75,0.631579,16,0.938272,0.95,0.944099,80,0.571429,0.8,0.666667,15


[I 2024-07-16 11:14:21,029] Trial 7 finished with value: 0.8863994096656127 and parameters: {'learning_rate': 3.340689577220468e-05, 'batch_size': 16, 'num_train_epochs': 6, 'weight_decay': 0.050388269317350444}. Best is trial 4 with value: 0.8896188587836443.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Overall Precision,Overall Recall,Overall F1,Overall Accuracy,F1 Macro Weighted,Precision Version,Recall Version,F1 Version,Number Version,Precision Alternativename,Recall Alternativename,F1 Alternativename,Number Alternativename,Precision Extension,Recall Extension,F1 Extension,Number Extension,Precision Citation,Recall Citation,F1 Citation,Number Citation,Precision License,Recall License,F1 License,Number License,Precision Developer,Recall Developer,F1 Developer,Number Developer,Precision Release,Recall Release,F1 Release,Number Release,Precision Url,Recall Url,F1 Url,Number Url,Precision Abbreviation,Recall Abbreviation,F1 Abbreviation,Number Abbreviation
0,No log,0.216122,0.945736,0.343662,0.504132,0.952642,0.403081,0.947137,0.754386,0.839844,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.935484,0.223077,0.360248,130,0.0,0.0,0.0,9,0.0,0.0,0.0,155,0.0,0.0,0.0,16,0.0,0.0,0.0,80,0.0,0.0,0.0,15
2,0.671000,0.093758,0.775839,0.814085,0.794502,0.979458,0.762225,0.869427,0.957895,0.911519,285,0.0,0.0,0.0,10,0.0,0.0,0.0,10,0.727273,0.861538,0.788732,130,0.0,0.0,0.0,9,0.596939,0.754839,0.666667,155,0.0,0.0,0.0,16,0.938272,0.95,0.944099,80,0.0,0.0,0.0,15


[I 2024-07-16 11:16:02,908] Trial 8 finished with value: 0.762224600747695 and parameters: {'learning_rate': 1.4293914088113572e-05, 'batch_size': 16, 'num_train_epochs': 3, 'weight_decay': 0.0006987185712216486}. Best is trial 4 with value: 0.8896188587836443.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss


## Retrain with best parameters

In [19]:
V

NameError: name 'V' is not defined

In [22]:
trial.params, trial.value

({'learning_rate': 4.03137219824488e-05,
  'batch_size': 4,
  'num_train_epochs': 5,
  'weight_decay': 2.6675525585646302e-05},
 0.8789546079779919)