In [None]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
import torch

def predict_pos(sentence_tokens, model_dir, id2label):
    # Charger modèle et tokenizer fine-tunés
    tokenizer = AutoTokenizer.from_pretrained(model_dir)
    model = AutoModelForTokenClassification.from_pretrained(model_dir)

    # Tokenisation avec alignement
    encoding = tokenizer(sentence_tokens, is_split_into_words=True, return_tensors="pt", truncation=True)

    # Prédiction
    with torch.no_grad():
        outputs = model(**encoding)
    logits = outputs.logits
    predictions = torch.argmax(logits, dim=2)

    # Associer les tokens initiaux à leurs étiquettes
    word_ids = encoding.word_ids()
    final_preds = []
    for idx, word_id in enumerate(word_ids):
        if word_id is not None and (idx == 0 or word_ids[idx] != word_ids[idx - 1]):
            pred_label = id2label[predictions[0][idx].item()]
            final_preds.append((sentence_tokens[word_id], pred_label))

    return final_preds

# Exemple d'utilisation :
if __name__ == "__main__":
    model_dir = "./pos_model"  # dossier du modèle fine-tuné
    # ⚠️ id2label doit être le même mapping qu’au TP4/TP5
    id2label = {0: "ADJ", 1: "NOUN", 2: "VERB", 3: "DET", 4: "ADV", 5: "PUNCT", 6: "ADP", 7: "PRON", 8: "PROPN"}  # exemple
    phrase = "Je mange une pomme verte ."
    tokens = phrase.split()

    predictions = predict_pos(tokens, model_dir, id2label)
    for word, tag in predictions:
        print(f"{word:>10} --> {tag}")
