In [None]:
# Cell 5: Model Interpretability and Inference Test

from transformers import pipeline
import os

# --- 1. Load your fine-tuned model from Google Drive ---
# Make sure this path points to the model you just saved.
# We'll use the mBERT model since it was the last one we trained.
SAVED_MODEL_PATH = os.path.join(GDRIVE_PROJECT_PATH, 'models', 'mbert-cased-ner-finetuned')

# Check if the saved model directory exists
if not os.path.exists(SAVED_MODEL_PATH):
    print(f"❌ ERROR: Saved model not found at '{SAVED_MODEL_PATH}'")
    print("Please make sure you have successfully completed the training and saving step.")
else:
    print(f"--- Loading fine-tuned model from: {SAVED_MODEL_PATH} ---")
    # Load the model into a token-classification pipeline for easy inference
    ner_pipeline = pipeline(
        "token-classification",
        model=SAVED_MODEL_PATH,
        tokenizer=SAVED_MODEL_PATH
    )

In [None]:
# --- 2. Define a sample sentence to test ---
    # Let's use a sentence that we know has entities.
    # From your labeled data: "Laorentou monk strap leather size 39,42,43,44 Price 3500 birr"
    test_sentence_1 = "Laorentou monk strap leather size 39,42,43,44 Price 3500 birr"
    
    # Another example with Amharic
    test_sentence_2 = "saachi የሚይዘው መጠን 3ሊትር ዋጋ 3999 ብር ነው" # saachi holder size 3liter price 3999 birr

    print("\n--- Running Inference on Sample Sentence 1 ---")
    predictions_1 = ner_pipeline(test_sentence_1)
    print(f"Sentence: '{test_sentence_1}'")
    print("Predictions:")
    for entity in predictions_1:
        print(f"  - Word: {entity['word']}, Entity: {entity['entity']}, Score: {entity['score']:.4f}")

    print("\n--- Running Inference on Sample Sentence 2 ---")
    predictions_2 = ner_pipeline(test_sentence_2)
    print(f"Sentence: '{test_sentence_2}'")
    print("Predictions:")
    for entity in predictions_2:
        print(f"  - Word: {entity['word']}, Entity: {entity['entity']}, Score: {entity['score']:.4f}")

    # --- 3. Analysis of the Results ---
    print("\n" + "="*50)
    print("--- Interpretability Analysis ---")
    print("="*50)
    print("As expected from the 0.0 F1-score during training, the model is not successfully identifying our target entities (PRODUCT, PRICE, LOC).")
    print("The predictions above likely show 'O' (represented as LABEL_0) for most or all tokens.")
    print("This confirms our key finding: the model has learned that predicting 'O' for everything is the safest way to minimize its loss, a classic sign that it has not been trained on enough labeled data to learn the specific patterns of our entities.")
    print("\nThis result, while poor in performance, provides a clear direction for future work: the primary focus must be on increasing the size of the high-quality, manually labeled dataset.")