In [1]:
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForTokenClassification
import spacy
from spacy import displacy

In [2]:
tokenizer = AutoTokenizer.from_pretrained("eolang/Swahili-NER-BertBase-Cased")
model = AutoModelForTokenClassification.from_pretrained("eolang/Swahili-NER-BertBase-Cased")

Downloading (…)okenizer_config.json:   0%|          | 0.00/360 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.92M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/709M [00:00<?, ?B/s]

In [3]:
nlp = pipeline("ner", model=model, tokenizer=tokenizer)
example = "Kwa nini Kenya inageukia mazao ya GMO kukabiliana na ukame"

In [4]:
ner_results = nlp(example)

In [5]:
# print word and NER tag
for result in ner_results:
    print(result["word"], result["entity"], result["score"])
    print('-----' * 10)

Kwa LABEL_0 0.99923885
--------------------------------------------------
nin LABEL_0 0.99925774
--------------------------------------------------
##i LABEL_0 0.9988446
--------------------------------------------------
Kenya LABEL_5 0.8325896
--------------------------------------------------
ina LABEL_0 0.9995541
--------------------------------------------------
##geu LABEL_0 0.9995402
--------------------------------------------------
##kia LABEL_0 0.9994684
--------------------------------------------------
ma LABEL_0 0.9829077
--------------------------------------------------
##za LABEL_0 0.9738737
--------------------------------------------------
##o LABEL_0 0.97064567
--------------------------------------------------
ya LABEL_0 0.9718624
--------------------------------------------------
GM LABEL_0 0.735248
--------------------------------------------------
##O LABEL_0 0.8975173
--------------------------------------------------
ku LABEL_0 0.99961275
-----------------------

------------

In [37]:
#print all keys
print(ner_results[0].keys())

dict_keys(['entity', 'score', 'index', 'word', 'start', 'end'])


In [39]:
# create a Spacy Doc object with the words
words = [r['word'] for r in ner_results]
doc = spacy.tokens.Doc(spacy.vocab.Vocab(), words=words)

In [40]:
# add entities to the Doc object
entities = [(i, i+1, r['entity']) for i, r in enumerate(ner_results)]
doc.ents = [spacy.tokens.Span(doc, start, end, label) for start, end, label in entities]

In [41]:
# create the visualization with displacy with colors
options = {'ents': [r['entity'] for r in ner_results]}
displacy.render(doc, style='ent', jupyter=True, options=options)