# Task 3 – Fine-tune a Transformer NER model for EthioMart
Trains a token-classification model (default **XLM-Roberta-base**) on your labelled Amharic e-commerce data.

*Input*: `data/ner/ner_labeled.conll` (100+ annotated sentences).
*Output*: fine-tuned model + metrics in `models/ner-xlmr/`.

## 0  Setup
Run the install cell once, preferably in Colab with GPU runtime.

In [None]:
    # !pip install -q transformers datasets evaluate seqeval accelerate

## 1  Hyper-parameters (edit here)

In [None]:
# 1. Hyper-parameters (edit here)
model_ckpt = 'xlm-roberta-base'   # or 'afroxlmr-base', 'bert-tiny-amharic'
learning_rate = 5e-5              # try 1e-5 – 5e-5
epochs = 22
batch_size = 8
max_length = 128


## 2  Load and parse CoNLL

In [None]:
# 2. Load and parse CoNLL
from pathlib import Path; import re, random
import datasets, evaluate, torch
from transformers import (AutoTokenizer, AutoModelForTokenClassification,
                         TrainingArguments, Trainer, DataCollatorForTokenClassification)

DATA_PATH = Path('ner_labeled.conll')
if not DATA_PATH.exists():
    raise FileNotFoundError(f'{DATA_PATH} not found – export your labels there.')

def read_conll(path):
    sents, labels = [], []
    cur_toks, cur_tags = [], []
    for line in path.read_text(encoding='utf-8').splitlines():
        if not line.strip():
            if cur_toks:
                sents.append(cur_toks); labels.append(cur_tags)
                cur_toks, cur_tags = [], []
            continue
        parts = re.split('[\t ]+', line.strip())
        tok, tag = parts[0], parts[1] if len(parts) > 1 else 'O'
        cur_toks.append(tok); cur_tags.append(tag)
    if cur_toks: sents.append(cur_toks); labels.append(cur_tags)
    return list(zip(sents, labels))

examples = read_conll(DATA_PATH)
print('Sentences:', len(examples))
label_list = sorted({t for _x, tags in examples for t in tags})
label2id = {l: i for i, l in enumerate(label_list)}
id2label = {i: l for l, i in label2id.items()}


## 3  Dataset & tokenisation

In [None]:
# 3. Dataset & tokenisation
def to_hf(example):
    toks, tags = example
    return {'tokens': toks, 'ner_tags': [label2id[t] for t in tags]}

ds = datasets.Dataset.from_list([to_hf(e) for e in examples])
ds = ds.shuffle(seed=42).train_test_split(test_size=0.2)
train_ds, eval_ds = ds['train'], ds['test']
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)

def tokenize(example):
    enc = tokenizer(example['tokens'], is_split_into_words=True, truncation=True, padding='max_length', max_length=max_length)
    
    word_ids = enc.word_ids()
    label_ids = []
    prev = None
    
    for wid in word_ids:
        if wid is None:
            label_ids.append(-100)
        elif wid != prev:
            label_ids.append(example['ner_tags'][wid])
            prev = wid
        else:
            label_ids.append(example['ner_tags'][wid])
            
    enc['labels'] = label_ids
    return enc

train_ds = train_ds.map(tokenize, remove_columns=train_ds.column_names)
eval_ds = eval_ds.map(tokenize, remove_columns=eval_ds.column_names)


## 4  Fine-tune

In [None]:
# 4. Fine-tune
model = AutoModelForTokenClassification.from_pretrained(model_ckpt, num_labels=len(label_list), id2label=id2label, label2id=label2id)

args = TrainingArguments(
    output_dir='models/ner-xlmr',
    num_train_epochs=epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    learning_rate=learning_rate,
    weight_decay=0.01,
    warmup_ratio=0.1,
    eval_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
    metric_for_best_model='eval_overall_f1',
    fp16=torch.cuda.is_available(),
    report_to='none'
)

data_collator = DataCollatorForTokenClassification(tokenizer)
seqeval = evaluate.load('seqeval')

def metrics(p):
    logits, labels = p
    preds = logits.argmax(-1)
    true_preds, true_labels = [], []
    
    for pred, lab in zip(preds, labels):
        tp, tl = [], []
        for p_i, l_i in zip(pred, lab):
            if l_i != -100:
                tp.append(id2label[p_i])
                tl.append(id2label[l_i])
        true_preds.append(tp)
        true_labels.append(tl)
    
    return seqeval.compute(
        predictions=true_preds, 
        references=true_labels,
        zero_division=0  # Suppress warnings
    )

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_ds,
    eval_dataset=eval_ds,
    data_collator=data_collator,
    compute_metrics=metrics
)

# Uncomment to train
trainer.train()

After training, the best checkpoint resides in `models/ner-xlmr/`. Load with:

```python
from transformers import AutoModelForTokenClassification
model = AutoModelForTokenClassification.from_pretrained('models/ner-xlmr')
```