In [3]:
import torch
from transformers import BertTokenizer, BertForTokenClassification  # Change here
import pandas as pd
model_path = 'model_NER'
tokenizer = BertTokenizer.from_pretrained(model_path)
model = BertForTokenClassification.from_pretrained(model_path)

model.eval()
id2tag = {0: 'O', 1: 'B-LOG', 2: 'I-LOG'}

def predict(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512)

    with torch.no_grad():
        outputs = model(**inputs)

    tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"].squeeze().tolist())
    predictions = outputs.logits.argmax(dim=2).squeeze().tolist()
    predicted_labels = [id2tag[pred] for pred in predictions]

    for i in range(1, len(tokens)):
        if tokens[i].startswith('##') and predicted_labels[i-1] in ['B-LOG', 'I-LOG']:
            predicted_labels[i] = 'I-LOG'

    prediction_pairs = list(zip(tokens, predicted_labels))
    return prediction_pairs

if __name__ == "__main__":
    text ='K2, known for its challenging climbs and technical difficulties, is often considered a more daunting ascent than Everest, despite Everest being the worlds highest peak.'
    prediction = predict(text)

    print(prediction)

[('[CLS]', 'O'), ('k', 'B-LOG'), ('##2', 'I-LOG'), (',', 'O'), ('known', 'O'), ('for', 'O'), ('its', 'O'), ('challenging', 'O'), ('climbs', 'O'), ('and', 'O'), ('technical', 'O'), ('difficulties', 'O'), (',', 'O'), ('is', 'O'), ('often', 'O'), ('considered', 'O'), ('a', 'O'), ('more', 'O'), ('da', 'O'), ('##unt', 'O'), ('##ing', 'O'), ('ascent', 'O'), ('than', 'O'), ('everest', 'B-LOG'), (',', 'O'), ('despite', 'O'), ('everest', 'B-LOG'), ('being', 'O'), ('the', 'O'), ('worlds', 'O'), ('highest', 'O'), ('peak', 'O'), ('.', 'O'), ('[SEP]', 'O')]


In [4]:
    df = pd.DataFrame(prediction, columns=['Token', 'Predicted Label'])
    print(df)

           Token Predicted Label
0          [CLS]               O
1              k           B-LOG
2            ##2           I-LOG
3              ,               O
4          known               O
5            for               O
6            its               O
7    challenging               O
8         climbs               O
9            and               O
10     technical               O
11  difficulties               O
12             ,               O
13            is               O
14         often               O
15    considered               O
16             a               O
17          more               O
18            da               O
19         ##unt               O
20         ##ing               O
21        ascent               O
22          than               O
23       everest           B-LOG
24             ,               O
25       despite               O
26       everest           B-LOG
27         being               O
28           the               O
29        