<a href="https://colab.research.google.com/github/LxYuan0420/nlp/blob/main/notebooks/flair/TUTORIAL_2_TAGGING.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!pip install flair

In [2]:
from flair.models import SequenceTagger

tagger = SequenceTagger.load("ner")



Downloading:   0%|          | 0.00/432M [00:00<?, ?B/s]

2022-11-14 16:04:06,358 loading file /root/.flair/models/ner-english/4f4cdab26f24cb98b732b389e6cebc646c36f54cfd6e0b7d3b90b25656e4262f.8baa8ae8795f4df80b28e7f7b61d788ecbb057d1dc85aacb316f1bd02837a4a4
2022-11-14 16:04:09,600 SequenceTagger predicts: Dictionary with 20 tags: <unk>, O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, <START>, <STOP>


In [3]:
from flair.data import Sentence

sentence = Sentence("George Washington went to Washington")

tagger.predict(sentence)

print(sentence)

Sentence: "George Washington went to Washington" → ["George Washington"/PER, "Washington"/LOC]


In [4]:
for entity in sentence.get_spans("ner"):
    print(entity)

Span[0:2]: "George Washington" → PER (0.9985)
Span[4:5]: "Washington" → LOC (0.9706)


In [5]:
# iterate over each entity
for entity in sentence.get_spans('ner'):

    # print entity text, start_position and end_position
    print(f'entity.text is: "{entity.text}"')
    print(f'entity.start_position is: "{entity.start_position}"')
    print(f'entity.end_position is: "{entity.end_position}"')

    # also print the value and score of its "ner"-label
    print(f'entity "ner"-label value is: "{entity.get_label("ner").value}"')
    print(f'entity "ner"-label score is: "{entity.get_label("ner").score}"\n')

entity.text is: "George Washington"
entity.start_position is: "0"
entity.end_position is: "17"
entity "ner"-label value is: "PER"
entity "ner"-label score is: "0.9984738826751709"

entity.text is: "Washington"
entity.start_position is: "26"
entity.end_position is: "36"
entity "ner"-label value is: "LOC"
entity "ner"-label score is: "0.9705727696418762"



In [6]:
for label in sentence.get_labels('ner'):
    print(label)

Span[0:2]: "George Washington" → PER (0.9985)
Span[4:5]: "Washington" → LOC (0.9706)


In [7]:
# iterate over all 'ner'-labels in the sentence
for label in sentence.get_labels('ner'):
    # print label value and score
    print(f'label.value is: "{label.value}"')
    print(f'label.score is: "{label.score}"')
    # access the data point to which label attaches and print its text
    print(f'the text of label.data_point is: "{label.data_point.text}"\n')

label.value is: "PER"
label.score is: "0.9984738826751709"
the text of label.data_point is: "George Washington"

label.value is: "LOC"
label.score is: "0.9705727696418762"
the text of label.data_point is: "Washington"



In [8]:
from flair.models import MultiTagger

# load tagger for POS and NER
tagger = MultiTagger.load(['pos', 'ner'])

# make example sentence
sentence = Sentence("George Washington went to Washington.")

# predict with both models
tagger.predict(sentence)

print(sentence)



Downloading:   0%|          | 0.00/249M [00:00<?, ?B/s]

2022-11-14 16:08:02,429 loading file /root/.flair/models/pos-english/a9a73f6cd878edce8a0fa518db76f441f1cc49c2525b2b4557af278ec2f0659e.121306ea62993d04cd1978398b68396931a39eb47754c8a06a87f325ea70ac63
2022-11-14 16:08:02,626 SequenceTagger predicts: Dictionary with 53 tags: <unk>, O, UH, ,, VBD, PRP, VB, PRP$, NN, RB, ., DT, JJ, VBP, VBG, IN, CD, NNS, NNP, WRB, VBZ, WDT, CC, TO, MD, VBN, WP, :, RP, EX, JJR, FW, XX, HYPH, POS, RBR, JJS, PDT, NNPS, RBS, AFX, WP$, -LRB-, -RRB-, ``, '', LS, $, SYM, ADD
2022-11-14 16:08:02,999 loading file /root/.flair/models/ner-english/4f4cdab26f24cb98b732b389e6cebc646c36f54cfd6e0b7d3b90b25656e4262f.8baa8ae8795f4df80b28e7f7b61d788ecbb057d1dc85aacb316f1bd02837a4a4




2022-11-14 16:08:06,524 SequenceTagger predicts: Dictionary with 20 tags: <unk>, O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, <START>, <STOP>
Sentence: "George Washington went to Washington ." → ["George"/NNP, "George Washington"/PER, "Washington"/NNP, "went"/VBD, "to"/IN, "Washington"/NNP, "Washington"/LOC, "."/.]


In [9]:
for label in sentence.get_labels('pos'):
    print(label)

for label in sentence.get_labels('ner'):
    print(label)

Token[0]: "George" → NNP (1.0)
Token[1]: "Washington" → NNP (1.0)
Token[2]: "went" → VBD (1.0)
Token[3]: "to" → IN (0.7628)
Token[4]: "Washington" → NNP (1.0)
Token[5]: "." → . (1.0)
Span[0:2]: "George Washington" → PER (0.9989)
Span[4:5]: "Washington" → LOC (0.9942)


In [12]:
tagger = SequenceTagger.load("frame")

sentence = Sentence('George returned to Berlin to return his hat.')

tagger.predict(sentence)

for token in sentence:
    print(token)



2022-11-14 16:09:46,496 loading file /root/.flair/models/frame-english/c397b8bbddf56e35a7d4b64295712a42a1a9b7ccf430dff76d03c8c7e26b9707.fd7786a36026b383ca73a1413c0a29aa1e67551621b805a0d28ca547636353b9
2022-11-14 16:09:46,891 SequenceTagger predicts: Dictionary with 5196 tags: <unk>, O, _, do.01, get.01, kid.01, know.01, be.01, send.01, seem.01, fold.03, have.03, want.01, say.01, pass.08, play.01, be_like.04, be.03, record.01, hear.01, speak.01, go.04, mean.01, let.01, go.01, see.01, drive.01, pull.01, look.01, start.01, come.01, get.06, pay.01, go.02, miss.01, know.02, know.06, forget.01, ask.02, mail.01, wait.01, be.02, make.02, make.01, think.01, live.01, care.01, smoke.02, put_off.07, mind.01
Token[0]: "George"
Token[1]: "returned" → return.01 (0.9951)
Token[2]: "to"
Token[3]: "Berlin"
Token[4]: "to"
Token[5]: "return" → return.02 (0.6361)
Token[6]: "his"
Token[7]: "hat"
Token[8]: "."


In [14]:
from flair.models import SequenceTagger
from flair.tokenization import SegtokSentenceSplitter

text = "This is a sentence. This is another sentence. I love Berlin."

splitter = SegtokSentenceSplitter()

sentences = splitter.split(text)

tagger = SequenceTagger.load("ner")
tagger.predict(sentences)

for sentence in sentences:
    print(sentence)

2022-11-14 16:11:47,786 loading file /root/.flair/models/ner-english/4f4cdab26f24cb98b732b389e6cebc646c36f54cfd6e0b7d3b90b25656e4262f.8baa8ae8795f4df80b28e7f7b61d788ecbb057d1dc85aacb316f1bd02837a4a4




2022-11-14 16:11:49,731 SequenceTagger predicts: Dictionary with 20 tags: <unk>, O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, <START>, <STOP>
Sentence: "This is a sentence ."
Sentence: "This is another sentence ."
Sentence: "I love Berlin ." → ["Berlin"/LOC]


In [15]:
from flair.models import TextClassifier

# load tagger
classifier = TextClassifier.load('sentiment')

2022-11-14 16:12:49,599 https://nlp.informatik.hu-berlin.de/resources/models/sentiment-curated-distilbert/sentiment-en-mix-distillbert_4.pt not found in cache, downloading to /tmp/tmpehnlqzqa


100%|██████████| 265512723/265512723 [00:11<00:00, 22598145.79B/s]

2022-11-14 16:13:01,704 copying /tmp/tmpehnlqzqa to cache at /root/.flair/models/sentiment-en-mix-distillbert_4.pt





2022-11-14 16:13:02,395 removing temp file /tmp/tmpehnlqzqa
2022-11-14 16:13:02,441 loading file /root/.flair/models/sentiment-en-mix-distillbert_4.pt


Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [16]:
# make example sentence
sentence = Sentence("enormously entertaining for moviegoers of any age.")

# call predict
classifier.predict(sentence)

# check prediction
print(sentence)

Sentence: "enormously entertaining for moviegoers of any age ." → POSITIVE (0.9976)


In [17]:
from flair.data import Sentence
from flair.models import RelationExtractor, SequenceTagger

# 1. make example sentence
sentence = Sentence("George was born in Washington")

# 2. load entity tagger and predict entities
tagger = SequenceTagger.load('ner-fast')
tagger.predict(sentence)

# check which entities have been found in the sentence
entities = sentence.get_labels('ner')
for entity in entities:
    print(entity)

# 3. load relation extractor
extractor: RelationExtractor = RelationExtractor.load('relations')

# predict relations
extractor.predict(sentence)

# check which relations have been found
relations = sentence.get_labels('relation')
for relation in relations:
    print(relation)



Downloading:   0%|          | 0.00/257M [00:00<?, ?B/s]

2022-11-14 16:13:12,782 loading file /root/.flair/models/ner-english-fast/4c58e7191ff952c030b82db25b3694b58800b0e722ff15427f527e1631ed6142.e13c7c4664ffe2bbfa8f1f5375bd0dced866b8c1dd7ff89a6d705518abf0a611
2022-11-14 16:13:14,877 SequenceTagger predicts: Dictionary with 20 tags: <unk>, O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, <START>, <STOP>
Span[0:1]: "George" → PER (0.9971)
Span[4:5]: "Washington" → LOC (0.9847)
2022-11-14 16:13:15,424 https://nlp.informatik.hu-berlin.de/resources/models/relations/relations-v11.pt not found in cache, downloading to /tmp/tmpk7gqc3xp


100%|██████████| 266351925/266351925 [00:12<00:00, 21541468.27B/s]

2022-11-14 16:13:28,155 copying /tmp/tmpk7gqc3xp to cache at /root/.flair/models/relations-v11.pt





2022-11-14 16:13:28,916 removing temp file /tmp/tmpk7gqc3xp
2022-11-14 16:13:28,970 loading file /root/.flair/models/relations-v11.pt
Relation[0:1][4:5]: "George -> Washington" → born_in (1.0)


Reference: https://github.com/flairNLP/flair/blob/master/resources/docs/TUTORIAL_2_TAGGING.md