In [1]:
import torch

There are two types of objects central to this library – Sentence and Token objects

A Sentence holds a textual sentence and is essentially a list of Tokens

In [2]:
from flair.data import Sentence

sentence_string = "Ala has a cat."
sentence = Sentence(sentence_string)

sentence

Sentence: "Ala has a cat." - 4 Tokens

In [3]:
# no assigned labels
print(sentence.labels)
# no assigned embedding to sentence
print(sentence.embedding)

#Note that the embedding is in pytorch-tensor type

[]
tensor([])


In [4]:
print(sentence.tokens)
# tokenized sentence with each token represented via Token class
print(type(sentence.tokens[0]))

for token in sentence:
    print(token.embedding)
    
# also, no assigned embeddings to tokens

[Token: 1 Ala, Token: 2 has, Token: 3 a, Token: 4 cat.]
<class 'flair.data.Token'>
tensor([])
tensor([])
tensor([])
tensor([])


## Word Embeddings

Flair provides well known word embeddings like glove, word2vec, or fasttext, together with newest embeddings based on transformer architecture.<br>
Furthermore, they can be stacked together, as well as with the signature embedding provided within the Flair library.

In [5]:
# example
from flair.embeddings import StackedEmbeddings
from flair.embeddings import FlairEmbeddings
from flair.embeddings import ELMoEmbeddings
from flair.embeddings import WordEmbeddings



stacked_embeddings1 = StackedEmbeddings(
    embeddings = [
        WordEmbeddings('glove'),
        FlairEmbeddings('news-forward-fast'), 
        FlairEmbeddings('news-backward-fast')
    ]
)



stacked_embeddings2 = StackedEmbeddings(
    embeddings = [
        ELMoEmbeddings(),
        FlairEmbeddings('news-forward-fast'), 
        FlairEmbeddings('news-backward-fast')
    ]
)

# stacked embeddings are pretty self-explanatory, but just in case - we concatenate them together to get richer words representation

  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL


In [6]:
# stacked layer
stacked_embeddings1.embeddings

[WordEmbeddings('glove'), FlairEmbeddings(
   (lm): LanguageModel(
     (drop): Dropout(p=0.25, inplace=False)
     (encoder): Embedding(275, 100)
     (rnn): LSTM(100, 1024)
     (decoder): Linear(in_features=1024, out_features=275, bias=True)
   )
 ), FlairEmbeddings(
   (lm): LanguageModel(
     (drop): Dropout(p=0.25, inplace=False)
     (encoder): Embedding(275, 100)
     (rnn): LSTM(100, 1024)
     (decoder): Linear(in_features=1024, out_features=275, bias=True)
   )
 )]

In [7]:
sentence_string = "Ala has a cat."
sentence = Sentence(sentence_string)

In [8]:
stacked_embeddings1.embed(sentence)
for token in sentence:
    print(token.embedding)
    print(type(token.embedding))
    print(token.embedding.shape, "\n")
    
# assigned embeddings to tokens
# stacked_size = elmo_size(1024) + 2*flair_size(512)

tensor([-1.1226e+00, -1.1322e+00, -4.3761e-01,  ..., -2.3225e-08,
         1.3534e-05,  2.1131e-02])
<class 'torch.Tensor'>
torch.Size([2148]) 

tensor([ 9.3736e-02,  5.6152e-01,  4.8364e-01,  ..., -7.1311e-08,
         3.2900e-05,  1.3822e-03])
<class 'torch.Tensor'>
torch.Size([2148]) 

tensor([-2.7086e-01,  4.4006e-02, -2.0260e-02,  ..., -1.5215e-08,
         1.4911e-04,  1.0475e-01])
<class 'torch.Tensor'>
torch.Size([2148]) 

tensor([ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ..., -2.0251e-08,
        -7.9864e-04,  2.4168e-02])
<class 'torch.Tensor'>
torch.Size([2148]) 



## Phrase Embeddings

We can create phrase embedding for given sentence by some arhitmetic operation(like calculate the mean) on obtained word embeddings,

In [9]:
stacked_embeddings1 = StackedEmbeddings(
    embeddings = [
        WordEmbeddings('glove'),
        FlairEmbeddings('news-forward-fast'), 
        FlairEmbeddings('news-backward-fast')
    ]
)

stacked_embeddings1.embed(sentence)

torch.cat([el.embedding.unsqueeze(0) for el in sentence.tokens]).mean(dim=0)

  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL


tensor([-3.2493e-01, -1.3167e-01,  6.4425e-03,  ..., -3.2500e-08,
        -1.5077e-04,  3.7857e-02])

or employ Document Embedding class in flair.embeddings module.

In [10]:
from flair.embeddings import DocumentLSTMEmbeddings
# or
from flair.embeddings import DocumentMeanEmbeddings
# or
from flair.embeddings import DocumentPoolEmbeddings
# or
from flair.embeddings import DocumentRNNEmbeddings

In [11]:
# example
document_embeddings = DocumentPoolEmbeddings(
    embeddings = [
        WordEmbeddings('glove'),
        FlairEmbeddings('news-forward-fast'), 
        FlairEmbeddings('news-backward-fast')
    ]
)

In [12]:
document_embeddings.embed(sentence)
sentence.embedding
# assigned embedding to sentence

tensor([-3.2493e-01, -1.3167e-01,  6.4425e-03,  ..., -3.2500e-08,
        -1.5077e-04,  3.7857e-02], grad_fn=<CatBackward>)