# <center><font color = '#DF9166' size = 20 center> **Model Interpretation**</font></center>



## <font color = '#DF9166' size=6>**Table of content**<font/><a class = 'anchor' id = 'introduction'/>

1. [**Import Libraries**](#import)
2. [**Data Loading**](#data_loading)
3. [**Model and Tokenizer Loading**](#model_loading)
4. [**Model Comparison**](#model_comparison)

## <font color = '#DF9166' size=6>**Import Libraries**<font/><a class = 'anchor' id = 'import'/>

In [2]:
import os
import sys
import random

from IPython.display import Image
from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline

In [None]:
sys.path.append(os.path.abspath(os.pardir))
from scripts.train import load_conll_file
from scripts.compare import *

## <font color = '#DF9166' size=6>**Data Loading**<font/><a class = 'anchor' id = 'data_loading'/>

In [5]:
DATA_PATH = (
    "/content/drive/MyDrive/10 acadamy/W5 Challenge/data/processed/labeled_data.conll"
)
SEED = 42

In [6]:
# Load dataset from your CoNLL file
dataset = load_conll_file(DATA_PATH, SEED)
dataset

DatasetDict({
    train: Dataset({
        features: ['tokens', 'ner_tags'],
        num_rows: 1850
    })
    test: Dataset({
        features: ['tokens', 'ner_tags'],
        num_rows: 232
    })
    validation: Dataset({
        features: ['tokens', 'ner_tags'],
        num_rows: 231
    })
})

In [7]:
# Extract texts and labels
texts = dataset["test"]["tokens"]
labels = dataset["test"]["ner_tags"]

## <font color = '#DF9166' size=6>**Model and Tokenizer Loading**<font/><a class = 'anchor' id = 'model_loading'/>

In [9]:
DSTLBERT_MODEL_NAME = "Naod-Demissie/distlbert-amh-telegram-trained-merged"
BERT_MODEL_NAME = "Naod-Demissie/bert-amh-telegram-trained-merged"

bert_tokenizer = AutoTokenizer.from_pretrained(BERT_MODEL_NAME)
bert_model = AutoModelForTokenClassification.from_pretrained(BERT_MODEL_NAME)
bert_pipeline = pipeline(
    "ner", model=bert_model, tokenizer=bert_tokenizer, aggregation_strategy="none"
)

dstlbert_tokenizer = AutoTokenizer.from_pretrained(DSTLBERT_MODEL_NAME)
dstlbert_model = AutoModelForTokenClassification.from_pretrained(DSTLBERT_MODEL_NAME)
dstlbert_pipeline = pipeline(
    "ner",
    model=dstlbert_model,
    tokenizer=dstlbert_tokenizer,
    aggregation_strategy="none",
)

Device set to use cuda:0
Device set to use cuda:0


## <font color = '#DF9166' size=6>**Model Comparison**<font/><a class = 'anchor' id = 'model_comparison'/>

In [17]:
# Evaluate Bert Model
y_true_bert, y_pred_bert = align_labels_and_predictions(
    texts, labels, bert_tokenizer, bert_pipeline
)

print("Bert Model Classification Report:")
print(classification_report(sum(y_true_bert, []), sum(y_pred_bert, [])))

Bert Model Classification Report:
              precision    recall  f1-score   support

         LOC       0.05      0.09      0.06       629
       PRICE       0.00      0.00      0.00       254
     PRODUCT       0.01      0.36      0.02       476

   micro avg       0.01      0.17      0.02      1359
   macro avg       0.02      0.15      0.03      1359
weighted avg       0.02      0.17      0.04      1359



In [12]:
# Evaluate Dislbert Model
y_true_dstlbert, y_pred_dstlbert = align_labels_and_predictions(
    texts, labels, dstlbert_tokenizer, dstlbert_pipeline
)

print("dstlbert Model Classification Report:")
print(classification_report(sum(y_true_dstlbert, []), sum(y_pred_dstlbert, [])))

dstlbert Model Classification Report:
              precision    recall  f1-score   support

         LOC       0.01      0.05      0.02       622
       PRICE       0.00      0.00      0.00       254
     PRODUCT       0.01      0.29      0.02       463

   micro avg       0.01      0.13      0.02      1339
   macro avg       0.01      0.12      0.01      1339
weighted avg       0.01      0.13      0.02      1339



### Per-Entity Performance Comparison

In [14]:
print(
    "Bert Model Per-Entity Performance:",
    per_entity_report(sum(y_true_bert, []), sum(y_pred_bert, [])),
)
print(
    "Distlbert Model Per-Entity Performance:",
    per_entity_report(sum(y_true_dstlbert, []), sum(y_pred_dstlbert, [])),
)

Bert Model Per-Entity Performance: {'LOC': {'Precision': 0.038430089942763694, 'Recall': 0.07556270096463022, 'F1-score': 0.05094850948509484}, 'PRICE': {'Precision': 0.0, 'Recall': 0.0, 'F1-score': 0.0}, 'PRODUCT': {'Precision': 0.008504734905603474, 'Recall': 0.3045356371490281, 'F1-score': 0.01654735359699566}, 'micro avg': {'Precision': 0.010549943883277216, 'Recall': 0.140403286034354, 'F1-score': 0.01962524140090819}, 'macro avg': {'Precision': 0.015644941616122388, 'Recall': 0.1266994460378861, 'F1-score': 0.0224986210273635}, 'weighted avg': {'Precision': 0.02079253786832967, 'Recall': 0.140403286034354, 'F1-score': 0.02938864646388199}}
Distlbert Model Per-Entity Performance: {'LOC': {'Precision': 0.014385353095030515, 'Recall': 0.05305466237942122, 'F1-score': 0.02263374485596708}, 'PRICE': {'Precision': 0.0, 'Recall': 0.0, 'F1-score': 0.0}, 'PRODUCT': {'Precision': 0.008276533592989289, 'Recall': 0.2937365010799136, 'F1-score': 0.01609943770346256}, 'micro avg': {'Precision'

### Inference Time Comparison

In [13]:
time_bert = measure_inference_time(bert_pipeline, texts)
time_dstlbert = measure_inference_time(dstlbert_pipeline, texts)

print(f"Bert Model Avg Inference Time: {time_bert:.4f} sec/sentence")
print(f"Distlbert Model Avg Inference Time: {time_dstlbert:.4f} sec/sentence")

Bert Model Avg Inference Time: 0.8243 sec/sentence
Distlbert Model Avg Inference Time: 0.4932 sec/sentence


### Model Size Comparison

In [15]:
size_bert = get_model_size(BERT_MODEL_NAME)
size_dstlbert = get_model_size(DSTLBERT_MODEL_NAME)

print(f"Bert Model Size: {size_bert:.2f} MB")
print(f"Distlbert Model Size: {size_dstlbert:.2f} MB")

Bert Model Size: 424.88 MB
Distlbert Model Size: 262.64 MB
