In [1]:
# Install necessary libraries
!pip install transformers==4.41.2 peft==0.10.0 datasets seqeval accelerate


Collecting transformers==4.41.2
  Downloading transformers-4.41.2-py3-none-any.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.8/43.8 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting peft==0.10.0
  Downloading peft-0.10.0-py3-none-any.whl.metadata (13 kB)
Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting tokenizers<0.20,>=0.19 (from transformers==4.41.2)
  Downloading tokenizers-0.19.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.13.0->peft==0.10.0)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.13.0->peft==0.10.0)
  Downloading nvidia

In [13]:
from google.colab import files
uploaded = files.upload()  # Upload conll_raw_sample.txt


Saving conll_raw_sample.txt to conll_raw_sample (1).txt


In [14]:
def read_conll(file_path):
    sentences = []
    tokens, labels = [], []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            if not line:
                if tokens:
                    sentences.append((tokens, labels))
                    tokens, labels = [], []
            else:
                splits = line.split()
                if len(splits) >= 2:
                    tokens.append(splits[0])
                    labels.append(splits[1])
    if tokens:
        sentences.append((tokens, labels))
    return sentences

data = read_conll("conll_raw_sample.txt")


In [15]:
from datasets import Dataset

tokens = [x[0] for x in data]
ner_tags = [x[1] for x in data]

label_list = sorted(set(tag for seq in ner_tags for tag in seq))
label2id = {l: i for i, l in enumerate(label_list)}
id2label = {i: l for l, i in label2id.items()}

tag_ids = [[label2id[tag] for tag in seq] for seq in ner_tags]

dataset = Dataset.from_dict({"tokens": tokens, "ner_tags": tag_ids})
dataset = dataset.train_test_split(test_size=0.2)


In [16]:
from transformers import AutoTokenizer, AutoModelForTokenClassification

model_name = "rasyosef/bert-tiny-amharic"  # Or use your preferred model
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForTokenClassification.from_pretrained(
    model_name,
    num_labels=len(label_list),
    id2label=id2label,
    label2id=label2id,
)


Some weights of BertForTokenClassification were not initialized from the model checkpoint at rasyosef/bert-tiny-amharic and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [27]:
def tokenize_and_align_labels(example):
    tokenized_inputs = tokenizer(
        example["tokens"],
        truncation=True,
        padding='max_length',  # ✅ Add this
        is_split_into_words=True,
        max_length=128         # ✅ Optionally limit sequence length
    )

    labels = []
    word_ids = tokenized_inputs.word_ids()
    prev_word_id = None

    for word_id in word_ids:
        if word_id is None:
            labels.append(-100)
        elif word_id != prev_word_id:
            labels.append(example["ner_tags"][word_id] if word_id < len(example["ner_tags"]) else -100)
        else:
            labels.append(example["ner_tags"][word_id] if word_id < len(example["ner_tags"]) else -100)
        prev_word_id = word_id

    tokenized_inputs["labels"] = labels
    return tokenized_inputs


In [28]:
tokenized_datasets = dataset.map(tokenize_and_align_labels, batched=False)

Map:   0%|          | 0/32 [00:00<?, ? examples/s]

Map:   0%|          | 0/8 [00:00<?, ? examples/s]

In [29]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./amharic-ner-results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
)




In [30]:
from seqeval.metrics import classification_report
import numpy as np

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_labels = [[id2label[l] for l in label if l != -100] for label in labels]
    true_predictions = [
        [id2label[pred] for pred, l in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    return classification_report(true_labels, true_predictions, output_dict=True)


In [31]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()


Epoch,Training Loss,Validation Loss,Loc,Price,Product,Unnamed: 6,Micro avg,Macro avg,Weighted avg
1,No log,2.072522,"{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 5}","{'precision': 0.004048582995951417, 'recall': 0.09090909090909091, 'f1-score': 0.007751937984496124, 'support': 11}","{'precision': 0.029333333333333333, 'recall': 0.4074074074074074, 'f1-score': 0.05472636815920398, 'support': 27}","{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1}","{'precision': 0.018018018018018018, 'recall': 0.2727272727272727, 'f1-score': 0.033802816901408454, 'support': 44}","{'precision': 0.008345479082321187, 'recall': 0.12457912457912457, 'f1-score': 0.015619576535925026, 'support': 44}","{'precision': 0.019012145748987856, 'recall': 0.2727272727272727, 'f1-score': 0.03552007404836283, 'support': 44}"
2,No log,2.029143,"{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 5}","{'precision': 0.00425531914893617, 'recall': 0.09090909090909091, 'f1-score': 0.008130081300813007, 'support': 11}","{'precision': 0.03081232492997199, 'recall': 0.4074074074074074, 'f1-score': 0.05729166666666667, 'support': 27}","{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1}","{'precision': 0.01904761904761905, 'recall': 0.2727272727272727, 'f1-score': 0.03560830860534125, 'support': 44}","{'precision': 0.00876691101972704, 'recall': 0.12457912457912457, 'f1-score': 0.01635543699186992, 'support': 44}","{'precision': 0.019971392812444127, 'recall': 0.2727272727272727, 'f1-score': 0.037188770325203256, 'support': 44}"
3,2.062400,1.998097,"{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 5}","{'precision': 0.0045871559633027525, 'recall': 0.09090909090909091, 'f1-score': 0.008733624454148473, 'support': 11}","{'precision': 0.03323262839879154, 'recall': 0.4074074074074074, 'f1-score': 0.06145251396648045, 'support': 27}","{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1}","{'precision': 0.020512820512820513, 'recall': 0.2727272727272727, 'f1-score': 0.03815580286168522, 'support': 44}","{'precision': 0.009454946090523574, 'recall': 0.12457912457912457, 'f1-score': 0.017546534605157232, 'support': 44}","{'precision': 0.02153953823553868, 'recall': 0.2727272727272727, 'f1-score': 0.03989290332024103, 'support': 44}"
4,2.062400,1.978882,"{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 5}","{'precision': 0.0049504950495049506, 'recall': 0.09090909090909091, 'f1-score': 0.009389671361502348, 'support': 11}","{'precision': 0.031746031746031744, 'recall': 0.37037037037037035, 'f1-score': 0.05847953216374269, 'support': 27}","{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1}","{'precision': 0.019927536231884056, 'recall': 0.25, 'f1-score': 0.03691275167785235, 'support': 44}","{'precision': 0.009174131698884173, 'recall': 0.11531986531986532, 'f1-score': 0.016967300881311258, 'support': 44}","{'precision': 0.02071814324289572, 'recall': 0.25, 'f1-score': 0.03823258530449042, 'support': 44}"
5,1.986200,1.971498,"{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 5}","{'precision': 0.005025125628140704, 'recall': 0.09090909090909091, 'f1-score': 0.009523809523809525, 'support': 11}","{'precision': 0.03257328990228013, 'recall': 0.37037037037037035, 'f1-score': 0.05988023952095808, 'support': 27}","{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1}","{'precision': 0.02029520295202952, 'recall': 0.25, 'f1-score': 0.03754266211604095, 'support': 44}","{'precision': 0.009399603882605209, 'recall': 0.11531986531986532, 'f1-score': 0.0173510122611919, 'support': 44}","{'precision': 0.02124443657434344, 'recall': 0.25, 'f1-score': 0.039125644814267564, 'support': 44}"


Trainer is attempting to log a value of "{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 5}" of type <class 'dict'> for key "eval/LOC" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.004048582995951417, 'recall': 0.09090909090909091, 'f1-score': 0.007751937984496124, 'support': 11}" of type <class 'dict'> for key "eval/PRICE" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.029333333333333333, 'recall': 0.4074074074074074, 'f1-score': 0.05472636815920398, 'support': 27}" of type <class 'dict'> for key "eval/Product" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1}" of type <clas

TrainOutput(global_step=20, training_loss=2.0243043899536133, metrics={'train_runtime': 11.0464, 'train_samples_per_second': 14.484, 'train_steps_per_second': 1.811, 'total_flos': 48885596160.0, 'train_loss': 2.0243043899536133, 'epoch': 5.0})

In [32]:
results = trainer.evaluate()
print("Validation Results:", results)

# Save model and tokenizer locally
model.save_pretrained("./amharic-ner-model")
tokenizer.save_pretrained("./amharic-ner-model")

Trainer is attempting to log a value of "{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 5}" of type <class 'dict'> for key "eval/LOC" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.005025125628140704, 'recall': 0.09090909090909091, 'f1-score': 0.009523809523809525, 'support': 11}" of type <class 'dict'> for key "eval/PRICE" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.03257328990228013, 'recall': 0.37037037037037035, 'f1-score': 0.05988023952095808, 'support': 27}" of type <class 'dict'> for key "eval/Product" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "{'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1}" of type <clas

Validation Results: {'eval_loss': 1.9714980125427246, 'eval_LOC': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 5}, 'eval_PRICE': {'precision': 0.005025125628140704, 'recall': 0.09090909090909091, 'f1-score': 0.009523809523809525, 'support': 11}, 'eval_Product': {'precision': 0.03257328990228013, 'recall': 0.37037037037037035, 'f1-score': 0.05988023952095808, 'support': 27}, 'eval__': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 1}, 'eval_micro avg': {'precision': 0.02029520295202952, 'recall': 0.25, 'f1-score': 0.03754266211604095, 'support': 44}, 'eval_macro avg': {'precision': 0.009399603882605209, 'recall': 0.11531986531986532, 'f1-score': 0.0173510122611919, 'support': 44}, 'eval_weighted avg': {'precision': 0.02124443657434344, 'recall': 0.25, 'f1-score': 0.039125644814267564, 'support': 44}, 'eval_runtime': 0.0966, 'eval_samples_per_second': 82.829, 'eval_steps_per_second': 10.354, 'epoch': 5.0}


('./amharic-ner-model/tokenizer_config.json',
 './amharic-ner-model/special_tokens_map.json',
 './amharic-ner-model/vocab.txt',
 './amharic-ner-model/added_tokens.json',
 './amharic-ner-model/tokenizer.json')