In [1]:
# Import required libraries
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForTokenClassification, DataCollatorForTokenClassification, TrainingArguments, Trainer, pipeline
from peft import get_peft_model, LoraConfig
import evaluate
import numpy as np
from huggingface_hub import notebook_login

In [2]:
raw_datasets = load_dataset("conll2003")
print(raw_datasets)

DatasetDict({
    train: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 14041
    })
    validation: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 3250
    })
    test: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 3453
    })
})


In [3]:
# Look at the tokens of the first training example
raw_datasets["train"][0]["tokens"]

['EU', 'rejects', 'German', 'call', 'to', 'boycott', 'British', 'lamb', '.']

In [4]:
# Look at the NER tags of the first training example
raw_datasets["train"][0]["ner_tags"]

[3, 0, 7, 0, 0, 0, 7, 0, 0]

In [5]:
# Get the label names for the NER tags
ner_feature = raw_datasets["train"].features["ner_tags"]
label_names = ner_feature.feature.names
label_names

['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC', 'B-MISC', 'I-MISC']

In [6]:
words = raw_datasets["train"][0]["tokens"]
labels = raw_datasets["train"][0]["ner_tags"]
line1 = ""
line2 = ""
for word, label in zip(words, labels):
    full_label = label_names[label]
    max_length = max(len(word), len(full_label))
    line1 += word + " " * (max_length - len(word) + 1)
    line2 += full_label + " " * (max_length - len(full_label) + 1)

print(line1)
print(line2)

EU    rejects German call to boycott British lamb . 
B-ORG O       B-MISC O    O  O       B-MISC  O    O 


In [7]:
# Load the tokenizer
model_checkpoint = "bert-base-cased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)



In [8]:
# Tokenize the first training example
inputs = tokenizer(raw_datasets["train"][0]["tokens"], is_split_into_words=True)
inputs.tokens()

['[CLS]',
 'EU',
 'rejects',
 'German',
 'call',
 'to',
 'boycott',
 'British',
 'la',
 '##mb',
 '.',
 '[SEP]']

In [9]:
def align_labels_with_tokens(labels, word_ids):
    new_labels = []
    current_word = None
    for word_id in word_ids:
        if word_id != current_word:
            # Start of a new word!
            current_word = word_id
            label = -100 if word_id is None else labels[word_id]
            new_labels.append(label)
        elif word_id is None:
            # Special token
            new_labels.append(-100)
        else:
            # Same word as previous token
            label = labels[word_id]
            # If the label is B-XXX we change it to I-XXX
            if label % 2 == 1:
                label += 1
            new_labels.append(label)

    return new_labels

In [10]:
labels = raw_datasets["train"][0]["ner_tags"]
word_ids = inputs.word_ids()
print(labels)
print(align_labels_with_tokens(labels, word_ids))

[3, 0, 7, 0, 0, 0, 7, 0, 0]
[-100, 3, 0, 7, 0, 0, 0, 7, 0, 0, 0, -100]


In [11]:
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(
        examples["tokens"], truncation=True, is_split_into_words=True
    )
    all_labels = examples["ner_tags"]
    new_labels = []
    for i, labels in enumerate(all_labels):
        word_ids = tokenized_inputs.word_ids(i)
        new_labels.append(align_labels_with_tokens(labels, word_ids))

    tokenized_inputs["labels"] = new_labels
    return tokenized_inputs

In [12]:
tokenized_datasets = raw_datasets.map(
    tokenize_and_align_labels,
    batched=True,
    remove_columns=raw_datasets["train"].column_names,
)

In [13]:
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

In [14]:
for i in range(2):
    print(tokenized_datasets["train"][i]["labels"])

[-100, 3, 0, 7, 0, 0, 0, 7, 0, 0, 0, -100]
[-100, 1, 2, -100]


In [15]:
metric = evaluate.load("seqeval")

In [16]:
# Create label mappings
id2label = {i: label for i, label in enumerate(label_names)}
label2id = {v: k for k, v in id2label.items()}

In [17]:
# Load the pre-trained model
model = AutoModelForTokenClassification.from_pretrained(
    model_checkpoint,
    id2label=id2label,
    label2id=label2id,
)

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [18]:
model.config.num_labels

9

In [19]:
model

BertForTokenClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12

In [20]:
# Configure LoRA (Low-Rank Adaptation) for fine-tuning
peft_config = LoraConfig(target_modules = ["query", "key"])

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 294,912 || all params: 108,021,513 || trainable%: 0.2730


In [21]:
def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)

    # Remove ignored index (special tokens) and convert to labels
    true_labels = [[label_names[l] for l in label if l != -100] for label in labels]
    true_predictions = [
        [label_names[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    all_metrics = metric.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": all_metrics["overall_precision"],
        "recall": all_metrics["overall_recall"],
        "f1": all_metrics["overall_f1"],
        "accuracy": all_metrics["overall_accuracy"],
    }

In [22]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [23]:
args = TrainingArguments(
    "bert-finetuned-ner-lora",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    num_train_epochs=20,
    weight_decay=0.01,
    load_best_model_at_end=True,
    do_eval=True,
    do_predict=True,
    metric_for_best_model="accuracy",
    label_names=["labels"],
    push_to_hub=True,
)



In [24]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)
trainer.train()

  0%|          | 0/35120 [00:00<?, ?it/s]

  attn_output = torch.nn.functional.scaled_dot_product_attention(


{'loss': 2.078, 'grad_norm': 0.1591925472021103, 'learning_rate': 1.9715261958997724e-05, 'epoch': 0.28}
{'loss': 1.7335, 'grad_norm': 0.35751551389694214, 'learning_rate': 1.9430523917995446e-05, 'epoch': 0.57}
{'loss': 1.23, 'grad_norm': 0.3602656126022339, 'learning_rate': 1.9145785876993168e-05, 'epoch': 0.85}


  0%|          | 0/407 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.9907475709915161, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.7612733266615648, 'eval_runtime': 5.9505, 'eval_samples_per_second': 546.17, 'eval_steps_per_second': 68.397, 'epoch': 1.0}
{'loss': 1.0322, 'grad_norm': 0.3555365800857544, 'learning_rate': 1.886104783599089e-05, 'epoch': 1.14}
{'loss': 0.9505, 'grad_norm': 0.41112151741981506, 'learning_rate': 1.8576309794988612e-05, 'epoch': 1.42}
{'loss': 0.8768, 'grad_norm': 0.5796128511428833, 'learning_rate': 1.8291571753986334e-05, 'epoch': 1.71}
{'loss': 0.8245, 'grad_norm': 0.974592387676239, 'learning_rate': 1.8006833712984056e-05, 'epoch': 1.99}


  0%|          | 0/407 [00:00<?, ?it/s]

{'eval_loss': 0.7515557408332825, 'eval_precision': 0.03552158273381295, 'eval_recall': 0.013295186805789297, 'eval_f1': 0.01934851824638746, 'eval_accuracy': 0.7825984576440808, 'eval_runtime': 6.0164, 'eval_samples_per_second': 540.189, 'eval_steps_per_second': 67.648, 'epoch': 2.0}
{'loss': 0.7702, 'grad_norm': 0.42826271057128906, 'learning_rate': 1.7722095671981778e-05, 'epoch': 2.28}
{'loss': 0.7507, 'grad_norm': 0.37430691719055176, 'learning_rate': 1.74373576309795e-05, 'epoch': 2.56}
{'loss': 0.7328, 'grad_norm': 0.5020062327384949, 'learning_rate': 1.7152619589977222e-05, 'epoch': 2.85}


  0%|          | 0/407 [00:00<?, ?it/s]

{'eval_loss': 0.6854819059371948, 'eval_precision': 0.2120204036371701, 'eval_recall': 0.16088858970043757, 'eval_f1': 0.18294900009568466, 'eval_accuracy': 0.8159033378465885, 'eval_runtime': 6.0089, 'eval_samples_per_second': 540.863, 'eval_steps_per_second': 67.733, 'epoch': 3.0}
{'loss': 0.7137, 'grad_norm': 0.45179906487464905, 'learning_rate': 1.6867881548974945e-05, 'epoch': 3.13}
{'loss': 0.7161, 'grad_norm': 0.46217212080955505, 'learning_rate': 1.6583143507972667e-05, 'epoch': 3.42}
{'loss': 0.6882, 'grad_norm': 0.31135469675064087, 'learning_rate': 1.629840546697039e-05, 'epoch': 3.7}
{'loss': 0.6738, 'grad_norm': 0.3984847366809845, 'learning_rate': 1.601366742596811e-05, 'epoch': 3.99}


  0%|          | 0/407 [00:00<?, ?it/s]

{'eval_loss': 0.6545637249946594, 'eval_precision': 0.2585321100917431, 'eval_recall': 0.23712554695388757, 'eval_f1': 0.24736657303370782, 'eval_accuracy': 0.8250721139695061, 'eval_runtime': 6.19, 'eval_samples_per_second': 525.041, 'eval_steps_per_second': 65.751, 'epoch': 4.0}
{'loss': 0.6759, 'grad_norm': 0.2769240438938141, 'learning_rate': 1.5728929384965833e-05, 'epoch': 4.27}
{'loss': 0.6697, 'grad_norm': 0.24548429250717163, 'learning_rate': 1.5444191343963555e-05, 'epoch': 4.56}
{'loss': 0.6639, 'grad_norm': 0.382127046585083, 'learning_rate': 1.5159453302961277e-05, 'epoch': 4.84}


  0%|          | 0/407 [00:00<?, ?it/s]

{'eval_loss': 0.633837878704071, 'eval_precision': 0.2685346327880265, 'eval_recall': 0.25967687647256815, 'eval_f1': 0.264031485284052, 'eval_accuracy': 0.828074409842821, 'eval_runtime': 6.2211, 'eval_samples_per_second': 522.417, 'eval_steps_per_second': 65.423, 'epoch': 5.0}
{'loss': 0.6516, 'grad_norm': 0.4856937527656555, 'learning_rate': 1.4874715261958999e-05, 'epoch': 5.13}
{'loss': 0.6443, 'grad_norm': 0.25792446732521057, 'learning_rate': 1.4589977220956721e-05, 'epoch': 5.41}
{'loss': 0.6406, 'grad_norm': 0.2429998368024826, 'learning_rate': 1.4305239179954442e-05, 'epoch': 5.69}
{'loss': 0.6447, 'grad_norm': 0.3957258462905884, 'learning_rate': 1.4020501138952165e-05, 'epoch': 5.98}


  0%|          | 0/407 [00:00<?, ?it/s]

{'eval_loss': 0.6184056401252747, 'eval_precision': 0.27051567586088743, 'eval_recall': 0.2657354426119152, 'eval_f1': 0.26810425333220134, 'eval_accuracy': 0.8326808736092306, 'eval_runtime': 6.0353, 'eval_samples_per_second': 538.495, 'eval_steps_per_second': 67.436, 'epoch': 6.0}
{'loss': 0.6377, 'grad_norm': 0.5821410417556763, 'learning_rate': 1.3735763097949887e-05, 'epoch': 6.26}
{'loss': 0.6451, 'grad_norm': 0.3864973187446594, 'learning_rate': 1.3451025056947608e-05, 'epoch': 6.55}
{'loss': 0.6218, 'grad_norm': 0.288789302110672, 'learning_rate': 1.3166287015945332e-05, 'epoch': 6.83}


  0%|          | 0/407 [00:00<?, ?it/s]

{'eval_loss': 0.6061442494392395, 'eval_precision': 0.28368673050615595, 'eval_recall': 0.2791989229215752, 'eval_f1': 0.28142493638676847, 'eval_accuracy': 0.8382439512568435, 'eval_runtime': 6.1302, 'eval_samples_per_second': 530.164, 'eval_steps_per_second': 66.393, 'epoch': 7.0}
{'loss': 0.6377, 'grad_norm': 0.31197211146354675, 'learning_rate': 1.2881548974943054e-05, 'epoch': 7.12}
{'loss': 0.6224, 'grad_norm': 0.2910526990890503, 'learning_rate': 1.2596810933940776e-05, 'epoch': 7.4}
{'loss': 0.6199, 'grad_norm': 0.6478071808815002, 'learning_rate': 1.2312072892938498e-05, 'epoch': 7.69}
{'loss': 0.6095, 'grad_norm': 0.34126749634742737, 'learning_rate': 1.2027334851936218e-05, 'epoch': 7.97}


  0%|          | 0/407 [00:00<?, ?it/s]

{'eval_loss': 0.5944703817367554, 'eval_precision': 0.30005000833472245, 'eval_recall': 0.30292830696735107, 'eval_f1': 0.3014822879155849, 'eval_accuracy': 0.8419968210984871, 'eval_runtime': 6.0547, 'eval_samples_per_second': 536.772, 'eval_steps_per_second': 67.22, 'epoch': 8.0}
{'loss': 0.6007, 'grad_norm': 0.3512535095214844, 'learning_rate': 1.1742596810933942e-05, 'epoch': 8.26}
{'loss': 0.6189, 'grad_norm': 0.5323229432106018, 'learning_rate': 1.1457858769931664e-05, 'epoch': 8.54}
{'loss': 0.6066, 'grad_norm': 0.22804991900920868, 'learning_rate': 1.1173120728929384e-05, 'epoch': 8.83}


  0%|          | 0/407 [00:00<?, ?it/s]

{'eval_loss': 0.582589328289032, 'eval_precision': 0.3131099544567339, 'eval_recall': 0.3239649949511949, 'eval_f1': 0.3184449958643507, 'eval_accuracy': 0.8446459056925885, 'eval_runtime': 6.0909, 'eval_samples_per_second': 533.582, 'eval_steps_per_second': 66.821, 'epoch': 9.0}
{'loss': 0.6082, 'grad_norm': 1.0766414403915405, 'learning_rate': 1.0888382687927108e-05, 'epoch': 9.11}
{'loss': 0.6021, 'grad_norm': 0.4065483510494232, 'learning_rate': 1.060364464692483e-05, 'epoch': 9.4}
{'loss': 0.5961, 'grad_norm': 0.5232006311416626, 'learning_rate': 1.0318906605922552e-05, 'epoch': 9.68}
{'loss': 0.5934, 'grad_norm': 0.3136926591396332, 'learning_rate': 1.0034168564920275e-05, 'epoch': 9.97}


  0%|          | 0/407 [00:00<?, ?it/s]

{'eval_loss': 0.5725618004798889, 'eval_precision': 0.3304, 'eval_recall': 0.3475260854931, 'eval_f1': 0.338746719160105, 'eval_accuracy': 0.849693883558015, 'eval_runtime': 6.2953, 'eval_samples_per_second': 516.255, 'eval_steps_per_second': 64.651, 'epoch': 10.0}
{'loss': 0.5814, 'grad_norm': 0.33662763237953186, 'learning_rate': 9.749430523917997e-06, 'epoch': 10.25}
{'loss': 0.5991, 'grad_norm': 0.236420139670372, 'learning_rate': 9.464692482915719e-06, 'epoch': 10.54}
{'loss': 0.5827, 'grad_norm': 0.4665989577770233, 'learning_rate': 9.17995444191344e-06, 'epoch': 10.82}


  0%|          | 0/407 [00:00<?, ?it/s]

{'eval_loss': 0.555459201335907, 'eval_precision': 0.349606766582579, 'eval_recall': 0.3964994951194884, 'eval_f1': 0.3715795284283574, 'eval_accuracy': 0.8574056631541768, 'eval_runtime': 6.2601, 'eval_samples_per_second': 519.162, 'eval_steps_per_second': 65.015, 'epoch': 11.0}
{'loss': 0.5827, 'grad_norm': 0.4704764187335968, 'learning_rate': 8.895216400911163e-06, 'epoch': 11.1}
{'loss': 0.5803, 'grad_norm': 0.4113923907279968, 'learning_rate': 8.610478359908885e-06, 'epoch': 11.39}
{'loss': 0.5842, 'grad_norm': 0.29213693737983704, 'learning_rate': 8.325740318906607e-06, 'epoch': 11.67}
{'loss': 0.5697, 'grad_norm': 0.2828220725059509, 'learning_rate': 8.041002277904329e-06, 'epoch': 11.96}


  0%|          | 0/407 [00:00<?, ?it/s]

{'eval_loss': 0.5435853600502014, 'eval_precision': 0.38317217491701544, 'eval_recall': 0.4468192527768428, 'eval_f1': 0.4125553570041178, 'eval_accuracy': 0.8646759286513216, 'eval_runtime': 6.0454, 'eval_samples_per_second': 537.6, 'eval_steps_per_second': 67.324, 'epoch': 12.0}
{'loss': 0.5642, 'grad_norm': 0.5388419032096863, 'learning_rate': 7.75626423690205e-06, 'epoch': 12.24}
{'loss': 0.5771, 'grad_norm': 0.3611924350261688, 'learning_rate': 7.471526195899773e-06, 'epoch': 12.53}
{'loss': 0.5689, 'grad_norm': 0.7496384382247925, 'learning_rate': 7.186788154897495e-06, 'epoch': 12.81}


  0%|          | 0/407 [00:00<?, ?it/s]

{'eval_loss': 0.5335272550582886, 'eval_precision': 0.3891377379619261, 'eval_recall': 0.46785594076068665, 'eval_f1': 0.42488155280452394, 'eval_accuracy': 0.8692235238711956, 'eval_runtime': 6.4315, 'eval_samples_per_second': 505.327, 'eval_steps_per_second': 63.283, 'epoch': 13.0}
{'loss': 0.5669, 'grad_norm': 0.4726976454257965, 'learning_rate': 6.9020501138952166e-06, 'epoch': 13.1}
{'loss': 0.5683, 'grad_norm': 0.49205562472343445, 'learning_rate': 6.617312072892939e-06, 'epoch': 13.38}
{'loss': 0.5579, 'grad_norm': 0.4790692627429962, 'learning_rate': 6.3325740318906616e-06, 'epoch': 13.67}
{'loss': 0.5601, 'grad_norm': 0.3624199628829956, 'learning_rate': 6.047835990888384e-06, 'epoch': 13.95}


  0%|          | 0/407 [00:00<?, ?it/s]

{'eval_loss': 0.5271334648132324, 'eval_precision': 0.4016842105263158, 'eval_recall': 0.48165600807808817, 'eval_f1': 0.438050049743629, 'eval_accuracy': 0.8724612939306529, 'eval_runtime': 6.177, 'eval_samples_per_second': 526.148, 'eval_steps_per_second': 65.89, 'epoch': 14.0}
{'loss': 0.5611, 'grad_norm': 0.33320072293281555, 'learning_rate': 5.763097949886105e-06, 'epoch': 14.24}
{'loss': 0.5431, 'grad_norm': 0.5229126811027527, 'learning_rate': 5.478359908883827e-06, 'epoch': 14.52}
{'loss': 0.5553, 'grad_norm': 0.5381569266319275, 'learning_rate': 5.19362186788155e-06, 'epoch': 14.81}


  0%|          | 0/407 [00:00<?, ?it/s]

{'eval_loss': 0.5203945636749268, 'eval_precision': 0.40960491414211925, 'eval_recall': 0.4937731403567822, 'eval_f1': 0.4477680274704312, 'eval_accuracy': 0.8755960440336729, 'eval_runtime': 6.0302, 'eval_samples_per_second': 538.957, 'eval_steps_per_second': 67.494, 'epoch': 15.0}
{'loss': 0.5664, 'grad_norm': 0.45702916383743286, 'learning_rate': 4.908883826879272e-06, 'epoch': 15.09}
{'loss': 0.5511, 'grad_norm': 0.8291336894035339, 'learning_rate': 4.624145785876993e-06, 'epoch': 15.38}
{'loss': 0.5535, 'grad_norm': 1.0453912019729614, 'learning_rate': 4.339407744874715e-06, 'epoch': 15.66}
{'loss': 0.5474, 'grad_norm': 0.5797114372253418, 'learning_rate': 4.054669703872437e-06, 'epoch': 15.95}


  0%|          | 0/407 [00:00<?, ?it/s]

{'eval_loss': 0.5146547555923462, 'eval_precision': 0.4094542447629548, 'eval_recall': 0.5, 'eval_f1': 0.45021973026216094, 'eval_accuracy': 0.8776858774356862, 'eval_runtime': 5.9717, 'eval_samples_per_second': 544.235, 'eval_steps_per_second': 68.155, 'epoch': 16.0}
{'loss': 0.556, 'grad_norm': 0.26725882291793823, 'learning_rate': 3.76993166287016e-06, 'epoch': 16.23}
{'loss': 0.5464, 'grad_norm': 0.8311058878898621, 'learning_rate': 3.4851936218678815e-06, 'epoch': 16.51}
{'loss': 0.546, 'grad_norm': 0.620249330997467, 'learning_rate': 3.200455580865604e-06, 'epoch': 16.8}


  0%|          | 0/407 [00:00<?, ?it/s]

{'eval_loss': 0.5108492970466614, 'eval_precision': 0.4133003572410003, 'eval_recall': 0.5062268596432178, 'eval_f1': 0.45506807866868376, 'eval_accuracy': 0.8792458939188792, 'eval_runtime': 6.0242, 'eval_samples_per_second': 539.491, 'eval_steps_per_second': 67.561, 'epoch': 17.0}
{'loss': 0.551, 'grad_norm': 0.40934082865715027, 'learning_rate': 2.9157175398633257e-06, 'epoch': 17.08}
{'loss': 0.548, 'grad_norm': 0.5092348456382751, 'learning_rate': 2.6309794988610482e-06, 'epoch': 17.37}
{'loss': 0.5372, 'grad_norm': 0.352668821811676, 'learning_rate': 2.34624145785877e-06, 'epoch': 17.65}
{'loss': 0.5434, 'grad_norm': 0.4402916729450226, 'learning_rate': 2.061503416856492e-06, 'epoch': 17.94}


  0%|          | 0/407 [00:00<?, ?it/s]

{'eval_loss': 0.5081017017364502, 'eval_precision': 0.41836173721825176, 'eval_recall': 0.5122854257825648, 'eval_f1': 0.4605840520502345, 'eval_accuracy': 0.8803791134396892, 'eval_runtime': 6.003, 'eval_samples_per_second': 541.395, 'eval_steps_per_second': 67.799, 'epoch': 18.0}
{'loss': 0.5611, 'grad_norm': 0.4256851077079773, 'learning_rate': 1.7767653758542143e-06, 'epoch': 18.22}
{'loss': 0.5421, 'grad_norm': 0.4777807891368866, 'learning_rate': 1.4920273348519363e-06, 'epoch': 18.51}
{'loss': 0.541, 'grad_norm': 0.8818345665931702, 'learning_rate': 1.2072892938496584e-06, 'epoch': 18.79}


  0%|          | 0/407 [00:00<?, ?it/s]

{'eval_loss': 0.5060338973999023, 'eval_precision': 0.41805974239517674, 'eval_recall': 0.51346348030966, 'eval_f1': 0.4608761329305136, 'eval_accuracy': 0.8807470418555365, 'eval_runtime': 6.0613, 'eval_samples_per_second': 536.192, 'eval_steps_per_second': 67.148, 'epoch': 19.0}
{'loss': 0.5448, 'grad_norm': 0.8677995800971985, 'learning_rate': 9.225512528473805e-07, 'epoch': 19.08}
{'loss': 0.5457, 'grad_norm': 0.4022636115550995, 'learning_rate': 6.378132118451026e-07, 'epoch': 19.36}
{'loss': 0.5469, 'grad_norm': 0.2670786380767822, 'learning_rate': 3.530751708428246e-07, 'epoch': 19.65}
{'loss': 0.5438, 'grad_norm': 0.37346577644348145, 'learning_rate': 6.83371298405467e-08, 'epoch': 19.93}


  0%|          | 0/407 [00:00<?, ?it/s]

{'eval_loss': 0.5054147839546204, 'eval_precision': 0.4196245032205016, 'eval_recall': 0.5153147088522383, 'eval_f1': 0.4625727018656998, 'eval_accuracy': 0.8810266674515806, 'eval_runtime': 5.9604, 'eval_samples_per_second': 545.267, 'eval_steps_per_second': 68.284, 'epoch': 20.0}
{'train_runtime': 1317.1526, 'train_samples_per_second': 213.202, 'train_steps_per_second': 26.664, 'train_loss': 0.6646072788499211, 'epoch': 20.0}


No files have been modified since last commit. Skipping to prevent empty commit.


TrainOutput(global_step=35120, training_loss=0.6646072788499211, metrics={'train_runtime': 1317.1526, 'train_samples_per_second': 213.202, 'train_steps_per_second': 26.664, 'total_flos': 6167088240555726.0, 'train_loss': 0.6646072788499211, 'epoch': 20.0})

In [27]:
# Replace this with your own checkpoint
model_checkpoint = "bert-finetuned-ner-lora"
token_classifier = pipeline(
    "token-classification", model=model_checkpoint, aggregation_strategy="simple"
)
token_classifier("My name is Jino.")

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


[{'entity_group': 'PER',
  'score': np.float32(0.9957055),
  'word': 'Jino',
  'start': 11,
  'end': 15}]