In [57]:
!pip install transformers datasets seqeval



In [58]:
!pip install transformers datasets seqeval accelerate peft




In [59]:
!pip install transformers datasets seqeval accelerate peft shap lime




In [60]:
import pandas as pd
import numpy as np
from datasets import Dataset, DatasetDict
from transformers import (AutoTokenizer, AutoModelForTokenClassification, Trainer, TrainingArguments, pipeline)
from transformers import DataCollatorForTokenClassification
from peft import LoraConfig, get_peft_model
from seqeval.metrics import classification_report

# Parse CoNLL formatted data
def parse_conll(file_path):
    sentences, labels = [], []
    with open(file_path, 'r', encoding='utf-8') as file:
        words, tags = [], []
        for line in file:
            if line.strip() == "":
                if words:
                    sentences.append(words)
                    labels.append(tags)
                    words, tags = [], []
            else:
                word, tag = line.strip().split()
                words.append(word)
                tags.append(tag)
        if words:
            sentences.append(words)
            labels.append(tags)
    return sentences, labels

In [61]:
# Load data
sentences, labels = parse_conll("/content/drive/MyDrive/Colab Notebooks/EthioMart/labeled_telegram_product_price_location.txt-")
dataset = Dataset.from_dict({"tokens": sentences, "ner_tags": labels})

In [62]:
# Create label mappings
label_list = list(set(tag for sublist in labels for tag in sublist))
label_to_id = {label: i for i, label in enumerate(label_list)}
id_to_label = {i: label for label, i in label_to_id.items()}


In [63]:
# Load model and tokenizer
model_name = "rasyosef/bert-tiny-amharic"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(
    model_name, num_labels=len(label_list), id2label=id_to_label, label2id=label_to_id
)

Some weights of BertForTokenClassification were not initialized from the model checkpoint at rasyosef/bert-tiny-amharic and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [64]:
# Print the model architecture
print(model)

BertForTokenClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28672, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-1): 2 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=128, out_features=128, bias=True)
              (key): Linear(in_features=128, out_features=128, bias=True)
              (value): Linear(in_features=128, out_features=128, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=128, out_features=128, bias=True)
              (LayerNorm): LayerNorm((128,), eps=1e-12, 

In [65]:
# Apply LoRA for PEFT (Parameter Efficient Fine-Tuning)
peft_config = LoraConfig(
    task_type="TOKEN_CLS",
    r=16,
    lora_alpha=32,
    target_modules=[
        "attention.self.query", "attention.self.key", "attention.self.value",
        "intermediate.dense", "output.dense"
    ],
    lora_dropout=0.1,
)

# Apply PEFT model
model = get_peft_model(model, peft_config)

In [66]:
# Tokenize and align labels
def tokenize_and_align_labels(batch):
    tokenized = tokenizer(batch["tokens"], truncation=True, is_split_into_words=True)
    labels = []
    for i, label in enumerate(batch["ner_tags"]):
        word_ids = tokenized.word_ids(batch_index=i)
        aligned_labels = [-100 if idx is None else label_to_id[label[idx]] for idx in word_ids]
        labels.append(aligned_labels)
    tokenized["labels"] = labels
    return tokenized

tokenized_dataset = dataset.map(tokenize_and_align_labels, batched=True)

Map:   0%|          | 0/13000 [00:00<?, ? examples/s]

In [67]:
# Split dataset
train_valid_split = tokenized_dataset.train_test_split(test_size=0.2)
tokenized_dataset = DatasetDict({
    "train": train_valid_split["train"],
    "validation": train_valid_split["test"]
})

In [68]:
# Set training arguments
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    save_strategy="epoch",
    save_total_limit=1,
    load_best_model_at_end=True,
)

In [69]:
# Define data collator
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

# Define custom metrics function
def compute_metrics(pred):
    predictions, labels = pred
    preds = np.argmax(predictions, axis=2)
    true_labels = [[id_to_label[l] for l in label if l != -100] for label in labels]
    true_preds = [[id_to_label[p] for p, l in zip(pred, label) if l != -100] for pred, label in zip(preds, labels)]

    precision = precision_score(true_labels, true_preds)
    recall = recall_score(true_labels, true_preds)
    f1 = f1_score(true_labels, true_preds)

    return {"precision": precision, "recall": recall, "f1": f1}

In [70]:
# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [71]:
# Train the model
trainer.train()

Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.8326,0.466169,0.890944,0.280386,0.426537
2,0.4319,0.353733,0.65433,0.438018,0.524757
3,0.3681,0.339303,0.669837,0.475598,0.556248


TrainOutput(global_step=1950, training_loss=0.4983121118790064, metrics={'train_runtime': 1860.0659, 'train_samples_per_second': 16.774, 'train_steps_per_second': 1.048, 'total_flos': 31897403097216.0, 'train_loss': 0.4983121118790064, 'epoch': 3.0})

In [72]:
# Evaluate the model
predictions, labels, _ = trainer.predict(tokenized_dataset["validation"])
preds = np.argmax(predictions, axis=2)
true_labels = [[id_to_label[l] for l in label if l != -100] for label in labels]
true_preds = [[id_to_label[p] for p, l in zip(pred, label) if l != -100] for pred, label in zip(preds, labels)]
print(classification_report(true_labels, true_preds))


  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

         LOC       0.00      0.00      0.00      1476
       PRICE       0.00      0.00      0.00      4084
     PRODUCT       0.67      0.64      0.66     15914

   micro avg       0.67      0.48      0.56     21474
   macro avg       0.22      0.21      0.22     21474
weighted avg       0.50      0.48      0.49     21474



In [73]:
# Save the fine-tuned model
model.save_pretrained("./fine_tuned_ner_model")
tokenizer.save_pretrained("./fine_tuned_ner_model")

('./fine_tuned_ner_model/tokenizer_config.json',
 './fine_tuned_ner_model/special_tokens_map.json',
 './fine_tuned_ner_model/vocab.txt',
 './fine_tuned_ner_model/added_tokens.json',
 './fine_tuned_ner_model/tokenizer.json')

In [74]:
# Inference pipeline
nlp = pipeline("ner", model="./fine_tuned_ner_model", tokenizer="./fine_tuned_ner_model", grouped_entities=True)
test_sentence = "አዲስ አበባ እንጀራ በ 30 ብር ይሸጣል።"
print(nlp(test_sentence))

Some weights of BertForTokenClassification were not initialized from the model checkpoint at rasyosef/bert-tiny-amharic and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cpu


[{'entity_group': 'LABEL_0', 'score': 0.6253024, 'word': 'አዲስ አበባ እንጀራ በ 30 ብር', 'start': 0, 'end': 20}, {'entity_group': 'LABEL_1', 'score': 0.51445895, 'word': 'ይሸ', 'start': 21, 'end': 23}, {'entity_group': 'LABEL_0', 'score': 0.5992029, 'word': '##ጣል ።', 'start': 23, 'end': 26}]




In [75]:
# Model Comparison (XLM-Roberta, DistilBERT, and mBERT)
model_names = ["Davlan/afro-xlmr-base", "bert-base-multilingual-cased", "distilbert-base-multilingual-cased"]
results = []
for model_name in model_names:
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForTokenClassification.from_pretrained(
        model_name, num_labels=len(label_list), id2label=id_to_label, label2id=label_to_id
    )
    model = get_peft_model(model, peft_config)
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset["train"],
        eval_dataset=tokenized_dataset["validation"],
        tokenizer=tokenizer,
    )
    trainer.train()
    predictions, labels, _ = trainer.predict(tokenized_dataset["validation"])
    preds = np.argmax(predictions, axis=2)
    true_preds = [[id_to_label[p] for p, l in zip(pred, label) if l != -100] for pred, label in zip(preds, labels)]
    f1 = classification_report(true_labels, true_preds, output_dict=True)["macro avg"]["f1-score"]
    results.append((model_name, f1))

print("Model Comparison Results:")
print(results)


Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at Davlan/afro-xlmr-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


ValueError: Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' 'truncation=True' to have batched tensors with the same length. Perhaps your features (`labels` in this case) have excessive nesting (inputs type `list` where type `int` is expected).

In [None]:
# Model Interpretability with SHAP and LIME
import shap
from lime.lime_text import LimeTextExplainer

# SHAP example for interpreting predictions
explainer = shap.Explainer(model, tokenizer)
shap_values = explainer([test_sentence])
shap.plots.text(shap_values[0])

# LIME example for interpreting predictions
explainer = LimeTextExplainer(class_names=label_list)
lime_exp = explainer.explain_instance(test_sentence, nlp)
lime_exp.show_in_notebook()