In [1]:
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForTokenClassification

In [30]:
#tokenizer = AutoTokenizer.from_pretrained("eolang/Swahili-NER-Distilbert-Cased")
tokenizer = AutoTokenizer.from_pretrained("TUS/Fill-Mask-V2", truncation=True)
model = AutoModelForTokenClassification.from_pretrained("eolang/Swahili-NER-Distilbert-Cased")

In [31]:
nlp = pipeline("ner", model=model, tokenizer=tokenizer)
example = "Kwa nini Kenya inageukia mazao ya GMO kukabiliana na ukame"

In [32]:
ner_results = nlp(example)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


TypeError: forward() got an unexpected keyword argument 'token_type_ids'

In [16]:
# print word and NER tag
for result in ner_results:
    print(result["word"], result["entity"], result["score"])
    print('-----' * 10)

Kwa LABEL_0 0.9981812
--------------------------------------------------
nin LABEL_0 0.99720913
--------------------------------------------------
##i LABEL_0 0.99845517
--------------------------------------------------
Kenya LABEL_5 0.992393
--------------------------------------------------
ina LABEL_0 0.9994105
--------------------------------------------------
##geu LABEL_0 0.9993168
--------------------------------------------------
##kia LABEL_0 0.99928516
--------------------------------------------------
ma LABEL_0 0.9938711
--------------------------------------------------
##za LABEL_0 0.99077773
--------------------------------------------------
##o LABEL_0 0.99350077
--------------------------------------------------
ya LABEL_0 0.9956169
--------------------------------------------------
GM LABEL_3 0.7276275
--------------------------------------------------
##O LABEL_3 0.63648194
--------------------------------------------------
ku LABEL_0 0.99914384
--------------------

In [17]:
#print all keys
print(ner_results[0].keys())


dict_keys(['entity', 'score', 'index', 'word', 'start', 'end'])


In [20]:
import spacy
from spacy import displacy

In [21]:
# create a Spacy Doc object with the words
words = [r['word'] for r in ner_results]
doc = spacy.tokens.Doc(spacy.vocab.Vocab(), words=words)

In [22]:
# add entities to the Doc object
entities = [(i, i+1, r['entity']) for i, r in enumerate(ner_results)]
doc.ents = [spacy.tokens.Span(doc, start, end, label) for start, end, label in entities]


In [24]:
# create the visualization with displacy
options = {'ents': [r['entity'] for r in ner_results]}
html = displacy.render(doc, style='ent', options=options)

In [26]:
# save the HTML code to a file
with open('ner_visualization.html', 'w', encoding='utf-8') as f:
    f.write(str(html))