#Fine-Tuning an NER Model for Amharic Text

##Setup Environment

Install Libraries:


In [1]:
!pip install transformers datasets seqeval -q

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/43.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for seqeval (setup.py) ... [?25l[?25hdone


##Load Dataset

In [2]:
from datasets import load_from_disk

dataset = load_from_disk("/content/drive/MyDrive/10Acadamy/amharic_e-commerce_data_extractor/data/hf_ner_dataset")
dataset = dataset.train_test_split(test_size=0.2, seed=42)
dataset

DatasetDict({
    train: Dataset({
        features: ['tokens', 'ner_tags'],
        num_rows: 8
    })
    test: Dataset({
        features: ['tokens', 'ner_tags'],
        num_rows: 2
    })
})

##Load the Tokenizer and Model
**I use the xlm-roberta.**
This is a public multilingual NER model trained on low-resource languages including African ones.

In [3]:
from transformers import AutoTokenizer, AutoModelForTokenClassification

model_checkpoint = "xlm-roberta-base"

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForTokenClassification.from_pretrained(model_checkpoint)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


##Preprocess Labels for Token Classification

In [4]:
label_list = list(set(label for row in dataset['train']['ner_tags'] for label in row))
label_list.sort()
label_to_id = {l: i for i, l in enumerate(label_list)}
id_to_label = {i: l for l, i in label_to_id.items()}

def encode_tags(example):
    return {'labels': [label_to_id[tag] for tag in example['ner_tags']]}

dataset = dataset.map(encode_tags)


Map:   0%|          | 0/8 [00:00<?, ? examples/s]

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

In [5]:
dataset["train"][0]

{'tokens': ['Threelayer',
  'Baby',
  'Milk',
  'Powder',
  'Container',
  '__',
  'High',
  'Quality',
  '__',
  '__',
  'Three',
  'Layer',
  'NoSpill',
  'Baby',
  'Feeding',
  'Milk',
  'Powder',
  'Food',
  'Dispenser',
  'A',
  'perfect',
  'storage',
  'for',
  'travel',
  'or',
  'home',
  'use__',
  '__እናት',
  'ልጇን',
  'ይዛ',
  'የተለያየ',
  'ቦታ',
  'ስትንቀሳቀስ',
  'የዱቄት',
  'ወተት',
  'የመሳሰሉትን',
  'አስፈላጊ',
  'የልጆች',
  'ምግብ',
  'ይዞ',
  'ለመንቀሳቀስ',
  'የሚረዳ',
  '3',
  'ፓርቲሽን',
  'ያለው',
  'አሪፍ',
  'ኮንቴነር__',
  'ዋጋ፦',
  '500ብር',
  'ውስን',
  'ፍሬ',
  'ነው',
  'ያለው',
  'አድራሻ',
  'መገናኛ_መሰረት_ደፋር_ሞል_ሁለተኛ_ፎቅ',
  'ቢሮ',
  'ቁ',
  'S05S06',
  '0902660722',
  '0928460606',
  'በTelegram',
  'ለማዘዝ',
  'ይጠቀሙ',
  'ለተጨማሪ',
  'ማብራሪያ',
  'የቴሌግራም',
  'ገፃችን'],
 'ner_tags': ['B-Product',
  'I-Product',
  'I-Product',
  'I-Product',
  'I-Product',
  'O',
  'O',
  'O',
  'O',
  'O',
  'B-Product',
  'I-Product',
  'I-Product',
  'I-Product',
  'I-Product',
  'I-Product',
  'I-Product',
  'I-Product',
  'I-Product',


In [None]:
!pip install -U transformers



In [6]:
import transformers
print(transformers.__version__)


4.52.4


In [7]:
from transformers import TrainingArguments


In [8]:
from transformers import DataCollatorForTokenClassification

def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True, padding=True)
    labels = []
    for i, label in enumerate(examples["labels"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        label_ids = []
        previous_word_idx = None
        for word_idx in word_ids:
            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:
                label_ids.append(label[word_idx])
            else:
                label_ids.append(label[word_idx])
            previous_word_idx = word_idx
        labels.append(label_ids)
    tokenized_inputs["labels"] = labels
    return tokenized_inputs

tokenized_dataset = dataset.map(tokenize_and_align_labels, batched=True)
data_collator = DataCollatorForTokenClassification(tokenizer)


Map:   0%|          | 0/8 [00:00<?, ? examples/s]

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

In [9]:
print(label_list)
print(f"# Labesls in dataset: {len(label_list)}")

['B-CONTACT', 'B-LOC', 'B-PRICE', 'B-Product', 'I-LOC', 'I-Product', 'O']
# Labesls in dataset: 7


##Initialize Model and Trainer

The pretrained model you loaded ```(Davlan/xlm-roberta-base-ner-hrl) ```has 9 NER labels.
so I've included ```ignore_mismatched_sizes=True ```

In [18]:
from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer

# 1. Load base model (not pre-trained NER)
model_checkpoint = "xlm-roberta-base"
model = AutoModelForTokenClassification.from_pretrained(
    model_checkpoint,
    num_labels=len(label_list),
    id2label=id_to_label,
    label2id=label_to_id
)

# 2. Updated training arguments
training_args = TrainingArguments(
    output_dir="results",
    eval_strategy="epoch",  # or "steps" with eval_steps=500
    learning_rate=2e-5,
    warmup_steps=500,
    per_device_train_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.01,
    logging_steps=10,  # Log loss every 10 steps
    log_level="info",
)

# 3. Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
    data_collator=data_collator
)

# 4. Train
trainer.train()


eval_results = trainer.evaluate()
print(f"Evaluation Results: {eval_results}")

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/e73636d4f797dec63c3081bb6ed5c7b0bb3f2089/config.json
Model config XLMRobertaConfig {
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "B-CONTACT",
    "1": "B-LOC",
    "2": "B-PRICE",
    "3": "B-Product",
    "4": "I-LOC",
    "5": "I-Product",
    "6": "O"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "B-CONTACT": 0,
    "B-LOC": 1,
    "B-PRICE": 2,
    "B-Product": 3,
    "I-LOC": 4,
    "I-Product": 5,
    "O": 6
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 1,
  "positi

Epoch,Training Loss,Validation Loss
1,No log,1.740428
2,No log,1.740213
3,No log,1.739782
4,No log,1.739135
5,No log,1.738269


The following columns in the Evaluation set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2
  Batch size = 8
The following columns in the Evaluation set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2
  Batch size = 8
The following columns in the Evaluation set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.


Evaluation Results: {'eval_loss': 1.738269329071045, 'eval_runtime': 0.6279, 'eval_samples_per_second': 3.185, 'eval_steps_per_second': 1.593, 'epoch': 5.0}


##Add Metrics Computation

In [11]:
from seqeval.metrics import f1_score, precision_score, recall_score
import numpy as np

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    # Remove ignored index (special tokens)
    true_predictions = [
        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    return {
        "precision": precision_score(true_labels, true_predictions),
        "recall": recall_score(true_labels, true_predictions),
        "f1": f1_score(true_labels, true_predictions),
    }

In [19]:
from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer

# 1. Load base model (not pre-trained NER)
model_checkpoint = "xlm-roberta-base"
model = AutoModelForTokenClassification.from_pretrained(
    model_checkpoint,
    num_labels=len(label_list),
    id2label=id_to_label,
    label2id=label_to_id
)

# 2. Updated training arguments
training_args = TrainingArguments(
    output_dir="results",
    eval_strategy="epoch",  # or "steps" with eval_steps=500
    learning_rate=2e-5,
    warmup_steps=500,
    per_device_train_batch_size=8,
    num_train_epochs=7,
    weight_decay=0.01,
    logging_steps=10,  # Log loss every 10 steps
    log_level="info",
)

# 3. Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    compute_metrics=compute_metrics, #this one is added
    tokenizer=tokenizer,
    data_collator=data_collator
)

# 4. Train
trainer.train()


eval_results = trainer.evaluate()
print(f"Evaluation Results: {eval_results}")

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/e73636d4f797dec63c3081bb6ed5c7b0bb3f2089/config.json
Model config XLMRobertaConfig {
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "B-CONTACT",
    "1": "B-LOC",
    "2": "B-PRICE",
    "3": "B-Product",
    "4": "I-LOC",
    "5": "I-Product",
    "6": "O"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "B-CONTACT": 0,
    "B-LOC": 1,
    "B-PRICE": 2,
    "B-Product": 3,
    "I-LOC": 4,
    "I-Product": 5,
    "O": 6
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 1,
  "positi

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.208729,0.0,0.0,0.0,0.066667
2,No log,2.208469,0.0,0.0,0.0,0.066667
3,No log,2.207953,0.0,0.0,0.0,0.066667
4,No log,2.207182,0.0,0.0,0.0,0.066667
5,No log,2.20615,0.0,0.0,0.0,0.066667
6,No log,2.204859,0.0,0.0,0.0,0.066667
7,No log,2.203317,0.0,0.0,0.0,0.066667


The following columns in the Evaluation set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2
  Batch size = 8
  _warn_prf(average, modifier, msg_start, len(result))
The following columns in the Evaluation set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2
  Batch size = 8
The following columns in the Evaluation set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `XLMRobertaForTokenClassif

Evaluation Results: {'eval_loss': 2.2033169269561768, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.06666666666666667, 'eval_runtime': 0.4239, 'eval_samples_per_second': 4.719, 'eval_steps_per_second': 2.359, 'epoch': 7.0}


In [14]:
from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer

# 1. Load base model (not pre-trained NER)
model_checkpoint = "xlm-roberta-base"
model = AutoModelForTokenClassification.from_pretrained(
    model_checkpoint,
    num_labels=len(label_list),
    id2label=id_to_label,
    label2id=label_to_id
)

# 2. Updated training arguments
training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/10Acadamy/amharic_e-commerce_data_extractor/results",
    eval_strategy="epoch",  # or "steps" with eval_steps=500
    learning_rate=2e-5,
    warmup_steps=500,
    per_device_train_batch_size=8,
    num_train_epochs=8,
    weight_decay=0.01,
    logging_steps=10,  # Log loss every 10 steps
    log_level="info",
)

# 3. Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    compute_metrics=compute_metrics, #this one is added
    tokenizer=tokenizer,
    data_collator=data_collator
)

# 4. Train
trainer.train()


eval_results = trainer.evaluate()
print(f"Evaluation Results: {eval_results}")

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/e73636d4f797dec63c3081bb6ed5c7b0bb3f2089/config.json
Model config XLMRobertaConfig {
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "B-CONTACT",
    "1": "B-LOC",
    "2": "B-PRICE",
    "3": "B-Product",
    "4": "I-LOC",
    "5": "I-Product",
    "6": "O"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "B-CONTACT": 0,
    "B-LOC": 1,
    "B-PRICE": 2,
    "B-Product": 3,
    "I-LOC": 4,
    "I-Product": 5,
    "O": 6
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 1,
  "positi

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,1.667598,0.029412,0.090909,0.044444,0.522222
2,No log,1.66736,0.029412,0.090909,0.044444,0.522222
3,No log,1.666888,0.029412,0.090909,0.044444,0.522222
4,No log,1.666182,0.029412,0.090909,0.044444,0.522222
5,No log,1.665239,0.029412,0.090909,0.044444,0.522222
6,No log,1.664061,0.030303,0.090909,0.045455,0.511111
7,No log,1.66265,0.030303,0.090909,0.045455,0.511111
8,No log,1.661006,0.033333,0.090909,0.04878,0.533333


The following columns in the Evaluation set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2
  Batch size = 8
The following columns in the Evaluation set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2
  Batch size = 8
The following columns in the Evaluation set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.


Evaluation Results: {'eval_loss': 1.661006212234497, 'eval_precision': 0.03333333333333333, 'eval_recall': 0.09090909090909091, 'eval_f1': 0.04878048780487805, 'eval_accuracy': 0.5333333333333333, 'eval_runtime': 0.7351, 'eval_samples_per_second': 2.721, 'eval_steps_per_second': 1.36, 'epoch': 8.0}


In [15]:
from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer

# 1. Load base model (not pre-trained NER)
model_checkpoint = "xlm-roberta-base"
model = AutoModelForTokenClassification.from_pretrained(
    model_checkpoint,
    num_labels=len(label_list),
    id2label=id_to_label,
    label2id=label_to_id
)

# 2. Updated training arguments
training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/10Acadamy/amharic_e-commerce_data_extractor/results",
    eval_strategy="epoch",  # or "steps" with eval_steps=500
    learning_rate=2e-5,
    warmup_steps=500,
    per_device_train_batch_size=8,
    num_train_epochs=9,
    weight_decay=0.01,
    logging_steps=10,  # Log loss every 10 steps
    log_level="info",
)

# 3. Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    compute_metrics=compute_metrics, #this one is added
    tokenizer=tokenizer,
    data_collator=data_collator
)

# 4. Train
trainer.train()


eval_results = trainer.evaluate()
print(f"Evaluation Results: {eval_results}")

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/e73636d4f797dec63c3081bb6ed5c7b0bb3f2089/config.json
Model config XLMRobertaConfig {
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "B-CONTACT",
    "1": "B-LOC",
    "2": "B-PRICE",
    "3": "B-Product",
    "4": "I-LOC",
    "5": "I-Product",
    "6": "O"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "B-CONTACT": 0,
    "B-LOC": 1,
    "B-PRICE": 2,
    "B-Product": 3,
    "I-LOC": 4,
    "I-Product": 5,
    "O": 6
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 1,
  "positi

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,1.535733,0.0,0.0,0.0,0.811111
2,No log,1.535522,0.0,0.0,0.0,0.811111
3,No log,1.535101,0.0,0.0,0.0,0.811111
4,No log,1.534468,0.0,0.0,0.0,0.811111
5,No log,1.533626,0.0,0.0,0.0,0.811111
6,No log,1.532574,0.0,0.0,0.0,0.811111
7,No log,1.531317,0.0,0.0,0.0,0.811111
8,No log,1.529844,0.0,0.0,0.0,0.811111
9,No log,1.528177,0.0,0.0,0.0,0.811111


The following columns in the Evaluation set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2
  Batch size = 8
  _warn_prf(average, modifier, msg_start, len(result))
The following columns in the Evaluation set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2
  Batch size = 8
The following columns in the Evaluation set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `XLMRobertaForTokenClassif

Evaluation Results: {'eval_loss': 1.5281773805618286, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8111111111111111, 'eval_runtime': 0.5648, 'eval_samples_per_second': 3.541, 'eval_steps_per_second': 1.77, 'epoch': 9.0}


In [17]:
from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer

# 1. Load base model (not pre-trained NER)
model_checkpoint = "xlm-roberta-base"
model = AutoModelForTokenClassification.from_pretrained(
    model_checkpoint,
    num_labels=len(label_list),
    id2label=id_to_label,
    label2id=label_to_id
)

# 2. Updated training arguments
training_args = TrainingArguments(
    output_dir="/content/drive/MyDrive/10Acadamy/amharic_e-commerce_data_extractor/results",
    eval_strategy="epoch",  # or "steps" with eval_steps=500
    learning_rate=2e-5,
    warmup_steps=500,
    per_device_train_batch_size=8,
    num_train_epochs=10,
    weight_decay=0.01,
    logging_steps=10,  # Log loss every 10 steps
    log_level="info",
)

# 3. Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    compute_metrics=compute_metrics, #this one is added
    tokenizer=tokenizer,
    data_collator=data_collator
)

# 4. Train
trainer.train()


eval_results = trainer.evaluate()
print(f"Evaluation Results: {eval_results}")

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--xlm-roberta-base/snapshots/e73636d4f797dec63c3081bb6ed5c7b0bb3f2089/config.json
Model config XLMRobertaConfig {
  "architectures": [
    "XLMRobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "B-CONTACT",
    "1": "B-LOC",
    "2": "B-PRICE",
    "3": "B-Product",
    "4": "I-LOC",
    "5": "I-Product",
    "6": "O"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "B-CONTACT": 0,
    "B-LOC": 1,
    "B-PRICE": 2,
    "B-Product": 3,
    "I-LOC": 4,
    "I-Product": 5,
    "O": 6
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 1,
  "positi

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,1.785783,0.0,0.0,0.0,0.333333
2,No log,1.785558,0.0,0.0,0.0,0.333333
3,No log,1.785108,0.0,0.0,0.0,0.333333
4,No log,1.784435,0.0,0.0,0.0,0.333333
5,No log,1.783535,0.0,0.0,0.0,0.333333
6,No log,1.782408,0.0,0.0,0.0,0.333333
7,No log,1.781057,0.0,0.0,0.0,0.333333
8,No log,1.779474,0.0,0.0,0.0,0.333333
9,No log,1.777681,0.0,0.0,0.0,0.355556
10,1.811200,1.775658,0.0,0.0,0.0,0.377778


The following columns in the Evaluation set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2
  Batch size = 8
  _warn_prf(average, modifier, msg_start, len(result))
The following columns in the Evaluation set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2
  Batch size = 8
The following columns in the Evaluation set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `XLMRobertaForTokenClassif

Evaluation Results: {'eval_loss': 1.775658130645752, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.37777777777777777, 'eval_runtime': 0.7481, 'eval_samples_per_second': 2.673, 'eval_steps_per_second': 1.337, 'epoch': 10.0}


##Evaluate

In [16]:
from datasets import load_metric

metric = load_metric("seqeval")

def compute_metrics(p):
    predictions, labels = p
    predictions = predictions.argmax(axis=-1)

    true_predictions = [
        [id_to_label[p] for (p, l) in zip(pred, label) if l != -100]
        for pred, label in zip(predictions, labels)
    ]
    true_labels = [
        [id_to_label[l] for (p, l) in zip(pred, label) if l != -100]
        for pred, label in zip(predictions, labels)
    ]
    results = metric.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": results["overall_precision"],
        "recall": results["overall_recall"],
        "f1": results["overall_f1"],
        "accuracy": results["overall_accuracy"],
    }

trainer.compute_metrics = compute_metrics
trainer.evaluate()


The following columns in the Evaluation set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: ner_tags, tokens. If ner_tags, tokens are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 2
  Batch size = 8


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 1.5281773805618286,
 'eval_precision': 0.0,
 'eval_recall': 0.0,
 'eval_f1': 0.0,
 'eval_accuracy': 0.8111111111111111,
 'eval_runtime': 0.4656,
 'eval_samples_per_second': 4.295,
 'eval_steps_per_second': 2.148,
 'epoch': 9.0}

##Save the model

In [None]:
trainer.save__model("/content/drive/MyDrive/10Acadamy/amharic_e-commerce_data_extractor/model")

#Task 5
## Inference and Deployment

In [22]:
!pip install shap lime transformers sentencepiece

Collecting lime
  Downloading lime-0.2.0.1.tar.gz (275 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.7/275.7 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: lime
  Building wheel for lime (setup.py) ... [?25l[?25hdone
  Created wheel for lime: filename=lime-0.2.0.1-py3-none-any.whl size=283834 sha256=e5e47231a23efc67d362b7be27e718bc666bd030a0d06fa4b12ae84a1e54cc8a
  Stored in directory: /root/.cache/pip/wheels/85/fa/a3/9c2d44c9f3cd77cf4e533b58900b2bf4487f2a17e8ec212a3d
Successfully built lime
Installing collected packages: lime
Successfully installed lime-0.2.0.1


##Loading Trained Model and Tokenizer

In [None]:
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline

model_path = "/content/drive/MyDrive/10Acadamy/amharic_e-commerce_data_extractor/results/checkpoint-7"

ner_pipeline = pipeline("ner", model=model_path, tokenizer=model_path)


loading configuration file /content/drive/MyDrive/10Acadamy/amharic_e-commerce_data_extractor/results/checkpoint-7/config.json
Model config XLMRobertaConfig {
  "architectures": [
    "XLMRobertaForTokenClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "B-CONTACT",
    "1": "B-LOC",
    "2": "B-PRICE",
    "3": "B-Product",
    "4": "I-LOC",
    "5": "I-Product",
    "6": "O"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "B-CONTACT": 0,
    "B-LOC": 1,
    "B-PRICE": 2,
    "B-Product": 3,
    "I-LOC": 4,
    "I-Product": 5,
    "O": 6
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",


In [1]:
import torch

##SHAP Analysis (Global Explanations)

In [6]:
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline

model_path = "/content/drive/MyDrive/10Acadamy/amharic_e-commerce_data_extractor/results/checkpoint-7"

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForTokenClassification.from_pretrained(model_path)

ner_pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")


Device set to use cpu


In [7]:
sample_text = "CHEKICH በ2800 ብር በአዲስ አበባ"

entities = ner_pipe(sample_text)
for entity in entities:
    print(f"{entity['word']} — {entity['entity_group']} — Score: {entity['score']:.2f}")


CHE — LOC — Score: 0.21
በ — LOC — Score: 0.22
2 — LOC — Score: 0.23
በአዲስ — LOC — Score: 0.22


In [8]:
from transformers import pipeline

# 1. Load pipeline with aggregation strategy
ner_pipe = pipeline(
    "ner",
    model=model_path,
    tokenizer=model_path,
    aggregation_strategy="simple"  # Groups subwords
)

# 2. Process text with word boundaries
sample_text = "አዲስ ሞዴል ቤት ማሽን በ 23000 ብር በአዲስ አበባ ሽያጭ ላይ ነው።"
entities = ner_pipe(sample_text)

# 3. Print clean results
for entity in entities:
    print(f"Entity: {entity['word']}")
    print(f"Type: {entity['entity_group']}")
    print(f"Confidence: {entity['score']:.2f}")
    print(f"Position: {entity['start']}-{entity['end']}\n")

Device set to use cpu


Entity: ሞ
Type: LOC
Confidence: 0.22
Position: 4-5

Entity: ል
Type: LOC
Confidence: 0.21
Position: 6-7

Entity: ማ
Type: LOC
Confidence: 0.22
Position: 11-12

Entity: በ
Type: LOC
Confidence: 0.22
Position: 15-16

Entity: 2
Type: LOC
Confidence: 0.23
Position: 17-18

Entity: ብር
Type: LOC
Confidence: 0.22
Position: 23-25

Entity: በአዲስ
Type: LOC
Confidence: 0.22
Position: 26-30

Entity: ያ
Type: LOC
Confidence: 0.22
Position: 36-37

Entity: ጭ
Type: LOC
Confidence: 0.21
Position: 37-38

Entity: ላይ
Type: LOC
Confidence: 0.21
Position: 39-41

Entity: ነው።
Type: LOC
Confidence: 0.24
Position: 42-45



In [10]:
texts = [
    "አዲስ ሞዴል ቤት ማሽን በ 23000 ብር በአዲስ አበባ ሽያጭ ላይ ነው።",
    "የህጻናት ቀሚስ በ1500 ብር አቅምን ላይ ነው።",
    "አስደናቂ ሻምፓን ከCHEKICH በ2800 ብር"
]
