In [48]:
!pip install transformers datasets seqeval



In [49]:
!pip install transformers datasets seqeval accelerate peft




In [50]:
!pip install transformers datasets seqeval accelerate peft shap lime




In [51]:
import pandas as pd
import numpy as np
from datasets import Dataset, DatasetDict
from transformers import (AutoTokenizer, AutoModelForTokenClassification, Trainer, TrainingArguments, pipeline)
from transformers import DataCollatorForTokenClassification
from peft import LoraConfig, get_peft_model
from seqeval.metrics import classification_report
from sklearn.metrics import precision_score, recall_score, f1_score

# Parse CoNLL formatted data
def parse_conll(file_path):
    sentences, labels = [], []
    with open(file_path, 'r', encoding='utf-8') as file:
        words, tags = [], []
        for line in file:
            if line.strip() == "":
                if words:
                    sentences.append(words)
                    labels.append(tags)
                    words, tags = [], []
            else:
                word, tag = line.strip().split()
                words.append(word)
                tags.append(tag)
        if words:
            sentences.append(words)
            labels.append(tags)
    return sentences, labels

In [52]:
# Load data
sentences, labels = parse_conll("/content/drive/MyDrive/Colab Notebooks/EthioMart/labeled_telegram_product_price_location.txt-")
dataset = Dataset.from_dict({"tokens": sentences, "ner_tags": labels})

In [53]:
# Create label mappings
label_list = list(set(tag for sublist in labels for tag in sublist))
label_to_id = {label: i for i, label in enumerate(label_list)}
id_to_label = {i: label for label, i in label_to_id.items()}


In [54]:
# Load model and tokenizer
model_name = "rasyosef/bert-tiny-amharic"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(
    model_name, num_labels=len(label_list), id2label=id_to_label, label2id=label_to_id
)

Some weights of BertForTokenClassification were not initialized from the model checkpoint at rasyosef/bert-tiny-amharic and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [55]:
# Print the model architecture
print(model)

BertForTokenClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28672, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-1): 2 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=128, out_features=128, bias=True)
              (key): Linear(in_features=128, out_features=128, bias=True)
              (value): Linear(in_features=128, out_features=128, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=128, out_features=128, bias=True)
              (LayerNorm): LayerNorm((128,), eps=1e-12, 

In [56]:
# Apply LoRA for PEFT (Parameter Efficient Fine-Tuning)
peft_config = LoraConfig(
    task_type="TOKEN_CLS",
    r=16,
    lora_alpha=32,
    target_modules=[
        "attention.self.query", "attention.self.key", "attention.self.value",
        "intermediate.dense", "output.dense"
    ],
    lora_dropout=0.1,
)

# Apply PEFT model
model = get_peft_model(model, peft_config)

In [57]:
# Tokenize and align labels
def tokenize_and_align_labels(batch):
    tokenized = tokenizer(batch["tokens"], truncation=True, is_split_into_words=True)
    labels = []
    for i, label in enumerate(batch["ner_tags"]):
        word_ids = tokenized.word_ids(batch_index=i)
        aligned_labels = [-100 if idx is None else label_to_id[label[idx]] for idx in word_ids]
        labels.append(aligned_labels)
    tokenized["labels"] = labels
    return tokenized

tokenized_dataset = dataset.map(tokenize_and_align_labels, batched=True)

Map:   0%|          | 0/13000 [00:00<?, ? examples/s]

In [58]:
# Split dataset
train_valid_split = tokenized_dataset.train_test_split(test_size=0.2)
tokenized_dataset = DatasetDict({
    "train": train_valid_split["train"],
    "validation": train_valid_split["test"]
})

In [59]:
# Set training arguments
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    num_train_epochs=5,
    weight_decay=0.01,
    save_strategy="epoch",
    save_total_limit=1,
    load_best_model_at_end=True,
)

In [60]:
# Define data collator
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

# Define custom metrics function
def compute_metrics(pred):
    predictions, labels = pred
    preds = np.argmax(predictions, axis=2)

    # Flatten the labels and predictions
    true_labels = [l for label in labels for l in label if l != -100]
    true_preds = [p for pred, label in zip(preds, labels) for p, l in zip(pred, label) if l != -100]

    precision = precision_score(true_labels, true_preds, average='micro')  # or 'macro' depending on your needs
    recall = recall_score(true_labels, true_preds, average='micro')
    f1 = f1_score(true_labels, true_preds, average='micro')

    return {"precision": precision, "recall": recall, "f1": f1}

In [61]:
# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [62]:
# Train the model
trainer.train()

Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.8306,0.434163,0.889204,0.889204,0.889204
2,0.4174,0.341042,0.91465,0.91465,0.91465
3,0.354,0.310364,0.919934,0.919934,0.919934
4,0.3067,0.292236,0.922168,0.922168,0.922168
5,0.2983,0.286486,0.923054,0.923054,0.923054


TrainOutput(global_step=3250, training_loss=0.4118293903057392, metrics={'train_runtime': 2972.3651, 'train_samples_per_second': 17.494, 'train_steps_per_second': 1.093, 'total_flos': 53156797464768.0, 'train_loss': 0.4118293903057392, 'epoch': 5.0})

In [63]:
# Evaluate the model
predictions, labels, _ = trainer.predict(tokenized_dataset["validation"])
preds = np.argmax(predictions, axis=2)
true_labels = [[id_to_label[l] for l in label if l != -100] for label in labels]
true_preds = [[id_to_label[p] for p, l in zip(pred, label) if l != -100] for pred, label in zip(preds, labels)]
print(classification_report(true_labels, true_preds))


  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

         LOC       0.00      0.00      0.00      1476
       PRICE       0.00      0.00      0.00      4084
     PRODUCT       0.70      0.77      0.74     15914

   micro avg       0.70      0.57      0.63     21474
   macro avg       0.23      0.26      0.25     21474
weighted avg       0.52      0.57      0.55     21474



In [64]:
# Save the fine-tuned model
model.save_pretrained("./fine_tuned_ner_model")
tokenizer.save_pretrained("./fine_tuned_ner_model")

('./fine_tuned_ner_model/tokenizer_config.json',
 './fine_tuned_ner_model/special_tokens_map.json',
 './fine_tuned_ner_model/vocab.txt',
 './fine_tuned_ner_model/added_tokens.json',
 './fine_tuned_ner_model/tokenizer.json')

In [65]:
# Inference pipeline
nlp = pipeline("ner", model="./fine_tuned_ner_model", tokenizer="./fine_tuned_ner_model", grouped_entities=True)
test_sentence = "አዲስ አበባ እንጀራ በ 30 ብር ይሸጣል።"
print(nlp(test_sentence))

Some weights of BertForTokenClassification were not initialized from the model checkpoint at rasyosef/bert-tiny-amharic and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cpu


[{'entity_group': 'LABEL_0', 'score': 0.5377132, 'word': 'አዲስ', 'start': 0, 'end': 3}, {'entity_group': 'LABEL_1', 'score': 0.5103878, 'word': 'አበባ እንጀራ', 'start': 4, 'end': 12}, {'entity_group': 'LABEL_0', 'score': 0.6306219, 'word': 'በ 30 ብር ይሸጣል ።', 'start': 13, 'end': 26}]




In [86]:
# Model Interpretability with SHAP and LIME
import shap
from lime.lime_text import LimeTextExplainer
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification

# Example sentence
test_sentence = "አዲስ አበባ እንጀራ በ 30 ብር ይሸጣል"

# Load the model and tokenizer (ensure model is correctly fine-tuned)
model = AutoModelForTokenClassification.from_pretrained("/content/fine_tuned_ner_model")
tokenizer = AutoTokenizer.from_pretrained("/content/fine_tuned_ner_model")

# Inference pipeline (NER)
nlp = pipeline("ner", model=model, tokenizer=tokenizer, grouped_entities=True)
print(nlp(test_sentence))

# SHAP example for interpreting predictions
# Define the function to properly tokenize input for SHAP
def shap_tokenizer_input(texts):
    inputs = tokenizer(texts, padding=True, truncation=True, return_tensors='pt')
    # Return both input_ids and attention_mask as SHAP expects them
    return {'input_ids': inputs['input_ids'], 'attention_mask': inputs['attention_mask']}

# SHAP explainer
explainer = shap.Explainer(model, shap_tokenizer_input)

# Pass test_sentence as a list (SHAP expects a list of sentences)
shap_values = explainer([test_sentence])  # Pass input as a list of sentences
shap.plots.text(shap_values[0])

# LIME example for interpreting predictions
# Define the function to map the model output to probabilities (you can customize it as needed)
def nlp(texts):
    inputs = tokenizer(texts, return_tensors='pt', padding=True, truncation=True)
    with torch.no_grad():
        logits = model(**inputs).logits
    return torch.nn.functional.softmax(logits, dim=-1).cpu().numpy()

# Initialize LIME explainer
explainer = LimeTextExplainer(class_names=["label1", "label2", "label3"])
lime_exp = explainer.explain_instance(test_sentence, nlp)
lime_exp.show_in_notebook()




Some weights of BertForTokenClassification were not initialized from the model checkpoint at rasyosef/bert-tiny-amharic and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RuntimeError: Error(s) in loading state_dict for BertForTokenClassification:
	size mismatch for classifier.modules_to_save.default.weight: copying a param with shape torch.Size([5, 128]) from checkpoint, the shape in current model is torch.Size([2, 128]).
	size mismatch for classifier.modules_to_save.default.bias: copying a param with shape torch.Size([5]) from checkpoint, the shape in current model is torch.Size([2]).