# Chapter 3 - Embeddings in Flair

This Jupyter notebook provides a resource to help you follow the code examples from the book more easily. The notebook covers all practical code snippets and exercises found in: Chapter 3 - Embeddings in Flair.

## Understanding word embeddings

### The “king - man ≈ queen - woman” analogy

#### Choosing and initializing the embedding class

In [None]:
from flair.embeddings import WordEmbeddings
fasttext = WordEmbeddings('crawl')

#### Obtaining the embeddings for words A, B and C and computing the embedding D

In [None]:
from flair.data import Sentence

def compute_embedding_for_D(A, B, C, embedding):
    wordsABC_sentence = Sentence(' '.join([A, B, C]))
    embedding.embed(wordsABC_sentence)
    
    A_embedded = wordsABC_sentence[0].embedding
    B_embedded = wordsABC_sentence[1].embedding
    C_embedded = wordsABC_sentence[2].embedding
    
    D_embedding = B_embedded + C_embedded - A_embedded

    return D_embedding.tolist()

In [None]:
from flair.embeddings import WordEmbeddings
fasttext = WordEmbeddings('crawl')
D = compute_embedding_for_D('king', 'man', 'queen', fasttext)

print(D)

#### Obtaining embeddings for all English words in Flair

In [None]:
from flair import datasets
from flair.data import Sentence

def get_embedded_english_vocab(embedding):
    dataset = datasets.UD_ENGLISH()
    vocab_list = dataset.make_vocab_dictionary().get_items()
    vocab = Sentence(' '.join(vocab_list))
    embedding.embed(vocab)
    return vocab

In [None]:
from flair.embeddings import WordEmbeddings
fasttext = WordEmbeddings('crawl')

print(get_embedded_english_vocab(fasttext)[6].embedding)

#### Finding the closest matching word

In [None]:
from sklearn.metrics.pairwise import cosine_similarity as sim

def find_closest_matching_word(D, vocab, ABC):
    max_match = -1
    for word in vocab:
        match = sim([D], [word.embedding.tolist()])[0][0]
        if match > max_match and word.text not in ABC:
            max_match = match
            closest_matching_word = word.text
    return closest_matching_word

In [None]:
def A_is_to_B_as_C_is_to(A, B, C):
    fasttext = WordEmbeddings('crawl')
    result = compute_embedding_for_D(A, B, C, fasttext)
    vocab = get_embedded_english_vocab(fasttext)
    D = find_closest_matching_word(result, vocab, {A, B, C})

    print(f'{A} is to {B} as {C} is to {D}')

#### Experimenting with the analogy solver

In [None]:
A_is_to_B_as_C_is_to("king", "man", "queen")

In [None]:
A_is_to_B_as_C_is_to("do", "did", "go")

In [None]:
A_is_to_B_as_C_is_to("bread", "baker", "meat")

In [None]:
A_is_to_B_as_C_is_to("London", "England", "Ljubljana")

In [None]:
A_is_to_B_as_C_is_to("life", "death", "beginning")

In [None]:
A_is_to_B_as_C_is_to("big", "bigger", "small")

In [None]:
A_is_to_B_as_C_is_to("man", "actor", "woman")

## Classic word embeddings in Flair

In [None]:
from flair.data import Sentence
from flair.embeddings import WordEmbeddings

embedding = WordEmbeddings('crawl')
sentence = Sentence("one two three one")
embedding.embed(sentence)

for token in sentence:
    print(token.embedding)

In [None]:
token1 = sentence[0]
token4 = sentence[3]

print(token1.embedding.tolist() == token4.embedding.tolist())

## Flair Embeddings

### Understanding the contextuality of Flair embeddings

In [None]:
from flair.data import Sentence
from flair.embeddings import FlairEmbeddings

embedding = FlairEmbeddings('news-forward')
s1 = Sentence("nice shirt")
s2 = Sentence("nice pants")

embedding.embed(s1)
embedding.embed(s2)

print(s1[0].embedding.tolist() == s2[0].embedding.tolist())

In [None]:
s1 = Sentence("very nice shirt")
s2 = Sentence("pretty nice pants")

embedding.embed(s1)
embedding.embed(s2)

print(s1[1].embedding.tolist() == s2[1].embedding.tolist())

## Character level sequence modeling in Flair embeddings

In [None]:
from sklearn.metrics.pairwise import cosine_similarity as sim

s1 = Sentence("eating potato")
s2 = Sentence("eating potatoo")

embedding = FlairEmbeddings('news-forward')
embedding.embed(s1)
embedding.embed(s2)
e1 = s1[1].embedding.tolist()
e2 = s2[1].embedding.tolist()

print(sim([e1], [e2])) 

## Stacked embeddings

In [None]:
from flair.embeddings import FlairEmbeddings, WordEmbeddings
from flair.embeddings import StackedEmbeddings

glove = WordEmbeddings('glove')
news_fw = FlairEmbeddings('news-forward')
news_bw = FlairEmbeddings('news-backward')

combined_embeddings_list = [glove, news_fw, news_bw]

stack = StackedEmbeddings(combined_embeddings_list)

## Document embeddings

In [None]:
from flair.data import Sentence
from flair.embeddings import TransformerDocumentEmbeddings

embedding = TransformerDocumentEmbeddings('bert-base-uncased')

sentence = Sentence('Example sentence .')
embedding.embed(sentence)

print(sentence.embedding)