In [None]:
!pip3 install flair

In [None]:
from flair.models import SequenceTagger
from flair.data import Sentence


tagger = SequenceTagger.load('ner')

sentence = Sentence('George Washington went to Washington.')

# predict NER tags
tagger.predict(sentence)

# print sentence with predicted tags
print(sentence.to_tagged_string())

# prints full spans of entity aka entire name/location/org instead of splitting them up
for entity in sentence.get_spans('ner'):
    print(entity)

# makes phrase and all entities found within into a dictionary
print(sentence.to_dict(tag_type='ner'))

2021-11-03 01:09:56,101 --------------------------------------------------------------------------------
2021-11-03 01:09:56,102 The model key 'ner' now maps to 'https://huggingface.co/flair/ner-english' on the HuggingFace ModelHub
2021-11-03 01:09:56,104  - The most current version of the model is automatically downloaded from there.
2021-11-03 01:09:56,108  - (you can alternatively manually download the original model at https://nlp.informatik.hu-berlin.de/resources/models/ner/en-ner-conll03-v0.4.pt)
2021-11-03 01:09:56,109 --------------------------------------------------------------------------------
2021-11-03 01:09:56,871 loading file /root/.flair/models/ner-english/4f4cdab26f24cb98b732b389e6cebc646c36f54cfd6e0b7d3b90b25656e4262f.8baa8ae8795f4df80b28e7f7b61d788ecbb057d1dc85aacb316f1bd02837a4a4
None
George <B-PER> Washington <E-PER> went to Washington <S-LOC> .
Span [1,2]: "George Washington"   [− Labels: PER (0.9989)]
Span [5]: "Washington"   [− Labels: LOC (0.9942)]
{'text': 'G

In [None]:
# Splitting sentences using Flair
from flair.models import SequenceTagger
from flair.tokenization import SegtokSentenceSplitter

# example text with many sentences
text = "This is a sentence. This is another sentence. I love Berlin."

# initialize sentence splitter
splitter = SegtokSentenceSplitter()

# use splitter to split text into list of sentences
sentences = splitter.split(text)

# predict tags for sentences
tagger = SequenceTagger.load('ner')
tagger.predict(sentences)

# iterate through sentences and print predicted labels
for sentence in sentences:
    print(sentence.to_tagged_string())

2021-11-03 01:17:33,548 --------------------------------------------------------------------------------
2021-11-03 01:17:33,554 The model key 'ner' now maps to 'https://huggingface.co/flair/ner-english' on the HuggingFace ModelHub
2021-11-03 01:17:33,555  - The most current version of the model is automatically downloaded from there.
2021-11-03 01:17:33,557  - (you can alternatively manually download the original model at https://nlp.informatik.hu-berlin.de/resources/models/ner/en-ner-conll03-v0.4.pt)
2021-11-03 01:17:33,558 --------------------------------------------------------------------------------
2021-11-03 01:17:34,315 loading file /root/.flair/models/ner-english/4f4cdab26f24cb98b732b389e6cebc646c36f54cfd6e0b7d3b90b25656e4262f.8baa8ae8795f4df80b28e7f7b61d788ecbb057d1dc85aacb316f1bd02837a4a4
{'text': 'This is a sentence.', 'labels': [], 'entities': []}
{'text': 'This is another sentence.', 'labels': [], 'entities': []}
{'text': 'I love Berlin.', 'labels': [], 'entities': [{'te

In [None]:
# test with own data
from flair.models import SequenceTagger
from flair.tokenization import SegtokSentenceSplitter

with open('text1-chunk.txt', 'r') as file:
    text = file.read()

splitter = SegtokSentenceSplitter()
sentences = splitter.split(text)

tagger = SequenceTagger.load('ner')
tagger.predict(sentences)

# list of dicts 
ner_tags = []
for sentence in sentences:
    ner_tags.append(sentence.to_dict(tag_type='ner'))

for i in ner_tags:
  print(i.get('text') + '\n')
  for j in i.get('entities'):
    word = j.get('text')
    label = j.get('labels')
    print(str(label) + ' ' + word)

2021-11-03 01:48:21,464 --------------------------------------------------------------------------------
2021-11-03 01:48:21,466 The model key 'ner' now maps to 'https://huggingface.co/flair/ner-english' on the HuggingFace ModelHub
2021-11-03 01:48:21,467  - The most current version of the model is automatically downloaded from there.
2021-11-03 01:48:21,469  - (you can alternatively manually download the original model at https://nlp.informatik.hu-berlin.de/resources/models/ner/en-ner-conll03-v0.4.pt)
2021-11-03 01:48:21,470 --------------------------------------------------------------------------------
2021-11-03 01:48:22,238 loading file /root/.flair/models/ner-english/4f4cdab26f24cb98b732b389e6cebc646c36f54cfd6e0b7d3b90b25656e4262f.8baa8ae8795f4df80b28e7f7b61d788ecbb057d1dc85aacb316f1bd02837a4a4
The Aidonia Treasure is a collection of Mycenaean gold and jewelry returned to Greece in 1996 and thought to have been robbed from a cemetery at Aidonia in the late 1970s.

[MISC (0.6894)]