In [None]:
!pip install datasets transformers[torch] accelerate -U

In [2]:
import numpy as np
import torch
from datasets import load_dataset
from transformers import DistilBertTokenizerFast, DistilBertForTokenClassification
from transformers import DataCollatorForTokenClassification
from transformers import TrainingArguments, Trainer
from transformers import EarlyStoppingCallback
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# 1. Učitavanje podataka
print("1. Učitavanje podataka")
dataset = load_dataset("conll2003", trust_remote_code=True)

# 2. Priprema tokenizatora
print("\n2. Priprema tokenizatora")
tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")

# 3. Priprema oznaka
label_list = dataset["train"].features["ner_tags"].feature.names
label_encoding_dict = {i: label for i, label in enumerate(label_list)}

# 4. Funkcija za tokenizaciju
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)
    labels = []
    for i, label in enumerate(examples["ner_tags"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:
            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:
                label_ids.append(label[word_idx])
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx
        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    return tokenized_inputs

# 5. Tokenizacija dataset-a
tokenized_datasets = dataset.map(tokenize_and_align_labels, batched=True)

# 6. Priprema modela
model = DistilBertForTokenClassification.from_pretrained("distilbert-base-uncased", num_labels=len(label_list))

# 7. Funkcija za računanje metrika
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)

    # Ignorišemo -100 oznake
    mask = labels != -100
    labels = labels[mask]
    preds = preds[mask]

    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# 8. Funkcija za treniranje
def train_model(model, tokenized_datasets, training_args):
    data_collator = DataCollatorForTokenClassification(tokenizer)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_datasets["train"],
        eval_dataset=tokenized_datasets["validation"],
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
    )

    trainer.train()

    return trainer

# 9. Eksperimentisanje sa hiperparametrima
learning_rates = [1e-5, 3e-5, 5e-5]
batch_sizes = [16, 32]
epochs = [3, 5]

best_f1 = 0
best_model = None
best_params = None

for lr in learning_rates:
    for bs in batch_sizes:
        for ep in epochs:
            print(f"\nTreniranje sa lr={lr}, batch_size={bs}, epochs={ep}")

            training_args = TrainingArguments(
                output_dir=f"./results_lr{lr}_bs{bs}_ep{ep}",
                num_train_epochs=ep,
                per_device_train_batch_size=bs,
                per_device_eval_batch_size=bs,
                learning_rate=lr,
                weight_decay=0.01,
                evaluation_strategy="epoch",
                save_strategy="epoch",
                load_best_model_at_end=True,
                metric_for_best_model="f1"
            )

            model = DistilBertForTokenClassification.from_pretrained("distilbert-base-uncased", num_labels=len(label_list))
            trainer = train_model(model, tokenized_datasets, training_args)

            eval_results = trainer.evaluate()
            print(f"Evaluation results: {eval_results}")

            if eval_results["eval_f1"] > best_f1:
                best_f1 = eval_results["eval_f1"]
                best_model = model
                best_params = {"lr": lr, "batch_size": bs, "epochs": ep}

print(f"\nNajbolji model: F1 score = {best_f1}")
print(f"Najbolji parametri: {best_params}")

# 10. Čuvanje najboljeg modela
#best_model.save_pretrained("./best_ner_model")
#tokenizer.save_pretrained("./best_ner_model")

print("\nEksperiment završen!")

1. Učitavanje podataka


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading builder script:   0%|          | 0.00/9.57k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/12.3k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/983k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/14041 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/3250 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/3453 [00:00<?, ? examples/s]


2. Priprema tokenizatora


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

Map:   0%|          | 0/14041 [00:00<?, ? examples/s]

Map:   0%|          | 0/3250 [00:00<?, ? examples/s]

Map:   0%|          | 0/3453 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Treniranje sa lr=1e-05, batch_size=16, epochs=3


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.2925,0.073408,0.979596,0.979271,0.979536,0.979596
2,0.0604,0.058031,0.984093,0.983922,0.983852,0.984093
3,0.0431,0.056255,0.984561,0.98445,0.984385,0.984561


Evaluation results: {'eval_loss': 0.056255463510751724, 'eval_accuracy': 0.9845605700712589, 'eval_f1': 0.9844498871409514, 'eval_precision': 0.9843851155020314, 'eval_recall': 0.9845605700712589, 'eval_runtime': 4.5663, 'eval_samples_per_second': 711.738, 'eval_steps_per_second': 44.675, 'epoch': 3.0}

Treniranje sa lr=1e-05, batch_size=16, epochs=5


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.2812,0.071729,0.980842,0.980552,0.980717,0.980842
2,0.0559,0.053844,0.985242,0.985065,0.984998,0.985242
3,0.0359,0.050797,0.986605,0.986504,0.986463,0.986605
4,0.0269,0.051388,0.986741,0.986726,0.986727,0.986741
5,0.0225,0.05175,0.987189,0.987146,0.987116,0.987189


Evaluation results: {'eval_loss': 0.051750294864177704, 'eval_accuracy': 0.9871889723920408, 'eval_f1': 0.987145798177078, 'eval_precision': 0.9871155288588866, 'eval_recall': 0.9871889723920408, 'eval_runtime': 4.6469, 'eval_samples_per_second': 699.386, 'eval_steps_per_second': 43.9, 'epoch': 5.0}

Treniranje sa lr=1e-05, batch_size=32, epochs=3


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.080721,0.977999,0.977519,0.977634,0.977999
2,0.259500,0.060803,0.98273,0.982452,0.982409,0.98273
3,0.067200,0.057776,0.983431,0.983264,0.983201,0.983431


Evaluation results: {'eval_loss': 0.0577763095498085, 'eval_accuracy': 0.9834313305556637, 'eval_f1': 0.9832638592699995, 'eval_precision': 0.983201049045579, 'eval_recall': 0.9834313305556637, 'eval_runtime': 4.7374, 'eval_samples_per_second': 686.03, 'eval_steps_per_second': 21.531, 'epoch': 3.0}

Treniranje sa lr=1e-05, batch_size=32, epochs=5


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.081149,0.97689,0.976448,0.976792,0.97689
2,0.251800,0.058247,0.983217,0.982917,0.982924,0.983217
3,0.064000,0.051941,0.984541,0.984369,0.984327,0.984541
4,0.043700,0.052031,0.985008,0.984962,0.984949,0.985008
5,0.033700,0.05108,0.985437,0.985328,0.985253,0.985437


Evaluation results: {'eval_loss': 0.05108022689819336, 'eval_accuracy': 0.9854367041781862, 'eval_f1': 0.9853275304340767, 'eval_precision': 0.9852532216276845, 'eval_recall': 0.9854367041781862, 'eval_runtime': 4.6849, 'eval_samples_per_second': 693.724, 'eval_steps_per_second': 21.772, 'epoch': 5.0}

Treniranje sa lr=3e-05, batch_size=16, epochs=3


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1703,0.050312,0.984833,0.984651,0.984763,0.984833
2,0.0321,0.048045,0.987442,0.987484,0.987564,0.987442
3,0.016,0.048242,0.988065,0.987996,0.987949,0.988065


Evaluation results: {'eval_loss': 0.0482424795627594, 'eval_accuracy': 0.9880651064989681, 'eval_f1': 0.9879962909540682, 'eval_precision': 0.9879486539895311, 'eval_recall': 0.9880651064989681, 'eval_runtime': 4.5133, 'eval_samples_per_second': 720.091, 'eval_steps_per_second': 45.2, 'epoch': 3.0}

Treniranje sa lr=3e-05, batch_size=16, epochs=5


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.164,0.05627,0.984015,0.98375,0.983936,0.984015
2,0.0323,0.050652,0.987442,0.9875,0.987602,0.987442
3,0.0164,0.04949,0.988065,0.987972,0.98795,0.988065
4,0.009,0.051957,0.988221,0.988174,0.988171,0.988221
5,0.0059,0.0542,0.988299,0.988226,0.988182,0.988299


Evaluation results: {'eval_loss': 0.05420003458857536, 'eval_accuracy': 0.9882987422608154, 'eval_f1': 0.9882257027699094, 'eval_precision': 0.9881819406876649, 'eval_recall': 0.9882987422608154, 'eval_runtime': 4.5467, 'eval_samples_per_second': 714.802, 'eval_steps_per_second': 44.868, 'epoch': 5.0}

Treniranje sa lr=3e-05, batch_size=32, epochs=3


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.053866,0.983353,0.983226,0.983564,0.983353
2,0.144400,0.045869,0.986994,0.986909,0.986891,0.986994
3,0.034500,0.046826,0.986955,0.986895,0.986851,0.986955


Evaluation results: {'eval_loss': 0.045868970453739166, 'eval_accuracy': 0.9869942759238347, 'eval_f1': 0.9869086416078499, 'eval_precision': 0.9868914685086827, 'eval_recall': 0.9869942759238347, 'eval_runtime': 4.8446, 'eval_samples_per_second': 670.849, 'eval_steps_per_second': 21.054, 'epoch': 3.0}

Treniranje sa lr=3e-05, batch_size=32, epochs=5


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.054146,0.982886,0.982784,0.983132,0.982886
2,0.141100,0.046345,0.987306,0.987273,0.987288,0.987306
3,0.033700,0.046819,0.987909,0.987744,0.987687,0.987909
4,0.017400,0.04906,0.987364,0.987297,0.987276,0.987364
5,0.011000,0.048668,0.988065,0.987993,0.987949,0.988065


Evaluation results: {'eval_loss': 0.04866808280348778, 'eval_accuracy': 0.9880651064989681, 'eval_f1': 0.9879929407198452, 'eval_precision': 0.9879492179377064, 'eval_recall': 0.9880651064989681, 'eval_runtime': 4.7785, 'eval_samples_per_second': 680.128, 'eval_steps_per_second': 21.346, 'epoch': 5.0}

Treniranje sa lr=5e-05, batch_size=16, epochs=3


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1374,0.053239,0.985106,0.984909,0.985072,0.985106
2,0.0279,0.047272,0.987617,0.987614,0.987635,0.987617
3,0.0115,0.048764,0.988416,0.988354,0.988314,0.988416


Evaluation results: {'eval_loss': 0.04876447841525078, 'eval_accuracy': 0.988415560141739, 'eval_f1': 0.9883537364482007, 'eval_precision': 0.9883137260383746, 'eval_recall': 0.988415560141739, 'eval_runtime': 4.5814, 'eval_samples_per_second': 709.388, 'eval_steps_per_second': 44.528, 'epoch': 3.0}

Treniranje sa lr=5e-05, batch_size=16, epochs=5


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1351,0.057043,0.984366,0.983988,0.984161,0.984366
2,0.029,0.049973,0.987968,0.987921,0.987892,0.987968
3,0.0127,0.05375,0.988124,0.98798,0.987958,0.988124
4,0.0067,0.057258,0.988007,0.987921,0.987893,0.988007
5,0.0033,0.058472,0.988357,0.988278,0.988229,0.988357


Evaluation results: {'eval_loss': 0.058472082018852234, 'eval_accuracy': 0.9883571512012772, 'eval_f1': 0.9882779343955158, 'eval_precision': 0.9882290340627261, 'eval_recall': 0.9883571512012772, 'eval_runtime': 4.7057, 'eval_samples_per_second': 690.654, 'eval_steps_per_second': 43.352, 'epoch': 5.0}

Treniranje sa lr=5e-05, batch_size=32, epochs=3


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.049621,0.985145,0.985002,0.985219,0.985145
2,0.117200,0.047344,0.987403,0.987308,0.987294,0.987403
3,0.026800,0.048019,0.987695,0.987616,0.987563,0.987695


Evaluation results: {'eval_loss': 0.04801937937736511, 'eval_accuracy': 0.9876951832093765, 'eval_f1': 0.9876155106846302, 'eval_precision': 0.9875626718491455, 'eval_recall': 0.9876951832093765, 'eval_runtime': 4.9043, 'eval_samples_per_second': 662.679, 'eval_steps_per_second': 20.798, 'epoch': 3.0}

Treniranje sa lr=5e-05, batch_size=32, epochs=5


Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.051374,0.984405,0.984291,0.984657,0.984405
2,0.114600,0.045423,0.988124,0.988043,0.988032,0.988124
3,0.026800,0.04747,0.987715,0.987562,0.987504,0.987715
4,0.011900,0.052078,0.988085,0.988005,0.987987,0.988085
5,0.006800,0.052267,0.988357,0.988286,0.988246,0.988357


Evaluation results: {'eval_loss': 0.052266668528318405, 'eval_accuracy': 0.9883571512012772, 'eval_f1': 0.9882856765854345, 'eval_precision': 0.988246201386865, 'eval_recall': 0.9883571512012772, 'eval_runtime': 4.7571, 'eval_samples_per_second': 683.187, 'eval_steps_per_second': 21.442, 'epoch': 5.0}

Najbolji model: F1 score = 0.9883537364482007
Najbolji parametri: {'lr': 5e-05, 'batch_size': 16, 'epochs': 3}

Eksperiment završen!
