# Word Embedding Translator

## Libraries

In [1]:
import numpy as np
import pandas as pd
import itertools as it

from gensim.models import KeyedVectors

from sklearn.metrics.pairwise import cosine_similarity

## 1 - Loading data
Loading the models and sentences used.
- Models: https://fasttext.cc/docs/en/crawl-vectors.html
- Sentences: https://github.com/alexa/massive

In [2]:
def load_files(model_path, sentences_path, limit = None):
    '''
    Load models from FastText folder and sentences from Amazon Massive folder.
    
    Params:
    - model_path: path to the folder containing all models used, i.e., FastText
    - sentences_path: path to the folder containing all sentences used, i.e., Amazon_Massive
    - limit: define a limit in case your have low computer power, e.g., 5000
    
    Return:
    Tuple containing the language model and its corresponding sentences
    '''

    model = KeyedVectors.load_word2vec_format(model_path, unicode_errors = 'replace', limit = limit)
    sentences = pd.read_json(sentences_path, lines = True)['utt']
    
    return model, sentences

Defining data path.

In [3]:
FASTTEXT_PATH = 'Datasets/FastText/'
MASSIVE_PATH = 'Datasets/Amazon_Massive/'

In [4]:
PATHS = {
    'en': [ FASTTEXT_PATH + 'cc.en.300.vec', MASSIVE_PATH + 'en-US.jsonl' ],
    'pt': [ FASTTEXT_PATH + 'cc.pt.300.vec', MASSIVE_PATH + 'pt-PT.jsonl' ],
    'es': [ FASTTEXT_PATH + 'cc.es.300.vec', MASSIVE_PATH + 'es-ES.jsonl' ]
}

LANGUAGES = PATHS.keys()

**Note**: the cell below takes approximately 5 to 6 minutes per model.

In [5]:
MODELS, SENTENCES = {}, {}

for language, value in PATHS.items():
    model = value[0]
    sentences = value[1]

    print("Loading", model)
    MODELS[language], SENTENCES[language] = load_files(model, sentences)
    print("Finished loading", model)

print("\nAll models and sentences are now loaded!")

Loading Datasets/FastText/cc.en.300.vec
Finished loading Datasets/FastText/cc.en.300.vec
Loading Datasets/FastText/cc.pt.300.vec
Finished loading Datasets/FastText/cc.pt.300.vec
Loading Datasets/FastText/cc.es.300.vec
Finished loading Datasets/FastText/cc.es.300.vec

All models and sentences are now loaded!


## 2 - Preparing data

In [6]:
SAMPLES = { key: [] for key in LANGUAGES }

In [7]:
# Since all sentence files have the same length, we chose one at random for the range function.
# We prove this in the cell below
for idx in range(len(SENTENCES['pt'])):
    
    actual_sentence = { key: [] for key in LANGUAGES}
    
    try:
        for lang, sent in SENTENCES.items():
            for word in sent[idx].split(' '):
                actual_sentence[lang].append(MODELS[lang][word])

    except KeyError:
        continue
    
    for key, value in actual_sentence.items():
        SAMPLES[key].append([SENTENCES[key][idx], sum(value)])

In [8]:
for key in SENTENCES:
    SIZE_SAMPLES = len(SAMPLES[key])
    print(f'Total sentences in { key } file: { len(SENTENCES[key]) } -> Model { key } samples: { len(SAMPLES[key]) } ({ len(SAMPLES[key]) / len(SENTENCES[key]) * 100:.2f}%)')

Total sentences in en file: 16521 -> Model en samples: 15055 (91.13%)
Total sentences in pt file: 16521 -> Model pt samples: 15055 (91.13%)
Total sentences in es file: 16521 -> Model es samples: 15055 (91.13%)


Splitting into train and test.

In [9]:
SPLIT_RATE = int(SIZE_SAMPLES * 0.7)

TRAIN_SET = { key: SAMPLES[key][:SPLIT_RATE] for key in LANGUAGES }
TEST_SET = { key: SAMPLES[key][SPLIT_RATE:] for key in LANGUAGES }

## 3 - Translating words

In [10]:
TRANSLATIONS = { key: { lang: None for lang in LANGUAGES if lang != key } for key in LANGUAGES }

In [11]:
for origin, target in it.permutations(LANGUAGES, 2): 

    samples_origin = [sample[1] for sample in TRAIN_SET[origin]]
    samples_target = [sample[1] for sample in TRAIN_SET[target]]

    U, Sig, Vt = np.linalg.svd(np.transpose(samples_origin) @ samples_target)
    
    TRANSLATOR = np.transpose(Vt) @ np.transpose(U)
    TRANSLATIONS[origin][target] = TRANSLATOR

### List of examples words
**Note**: only single words can be written, i.e., compound words like "washing machine" will result in Error

- English

In [12]:
EN_WORD_LIST = [
    'specification',
    'book',
    'duckling',
    'machine',
    'headphones'
]

- Portuguese

In [13]:
PT_WORD_LIST = [
    'sapato',
    'flor',
    'aniversário',
    'saudades',
]

- Spanish

In [14]:
ES_WORD_LIST = [
    'hola',
    'sí',
    'computadora',
    'país'
]

In [15]:
def translate(word_list, origin_lang, target_lang):
    '''
    Function to translate one word from one language to another.

    Params:
    - word_list: list of example words.
    - origin_lang: language in which the words in word_list are written
    - target_lang: language you wish to know the translation

    Example of usage:
    translate(PT_WORD_LIST, 'es', 'pt')
    '''
    
    for word in word_list:
        print("Original word:", word)
        print("Top 10 most similar words in", target_lang)
        print(MODELS[target_lang].most_similar(TRANSLATIONS[origin_lang][target_lang] @ MODELS[origin_lang][word]))
        print("\n")

### Examples

- Portuguese -> Spanish

In [16]:
translate(PT_WORD_LIST, 'pt', 'es')

Original word: sapato
Top 10 most similar words in es
[('zapato', 0.6286161541938782), ('zapatos', 0.5265895128250122), ('abriguito', 0.5155419707298279), ('bolso', 0.5067410469055176), ('vestido', 0.49005553126335144), ('vestidito', 0.485481858253479), ('sapato', 0.48409122228622437), ('calzado', 0.48325690627098083), ('zapatito', 0.4791560173034668), ('calcetin', 0.4784621000289917)]


Original word: flor
Top 10 most similar words in es
[('flor', 0.5606870055198669), ('florecilla', 0.4921896159648895), ('flores.La', 0.4749460220336914), ('peonia', 0.4661574959754944), ('gardenia', 0.4601023197174072), ('macetita', 0.45834779739379883), ('camelia', 0.45742037892341614), ('plantita', 0.4572441875934601), ('flor.Se', 0.45594120025634766), ('rosaY', 0.4555289149284363)]


Original word: aniversário
Top 10 most similar words in es
[('cumpleaños', 0.7255707383155823), ('cumpleaño', 0.643695056438446), ('cumpleños', 0.6012689471244812), ('aniversario', 0.5929712653160095), ('Cumpleaños', 0.

- Portuguese -> English

In [17]:
translate(PT_WORD_LIST, 'pt', 'en')

Original word: sapato
Top 10 most similar words in en
[('shoes', 0.4436459243297577), ('shoe', 0.42799580097198486), ('dress', 0.41877812147140503), ('wear', 0.4135258197784424), ('handbag', 0.4094645082950592), ('shoes.', 0.407163143157959), ('high-heels', 0.40656235814094543), ('clothes', 0.4035921096801758), ('accesorize', 0.39818230271339417), ('shoes.It', 0.39019066095352173)]


Original word: flor
Top 10 most similar words in en
[('flower', 0.4548163115978241), ('flowers', 0.44974949955940247), ('peony', 0.4329623281955719), ('flower.', 0.4295286536216736), ('roses', 0.4272284507751465), ('flowers.I', 0.4217860996723175), ('flowers.So', 0.4205949902534485), ('peonies', 0.41655293107032776), ('flower.I', 0.41433611512184143), ('flowers.And', 0.4127655625343323)]


Original word: aniversário
Top 10 most similar words in en
[('birthday', 0.6571257710456848), ('brithday', 0.5902238488197327), ('anniversary', 0.572634756565094), ('half-birthday', 0.5639539361000061), ('birhday', 0.562

- Spanish -> English

In [18]:
translate(ES_WORD_LIST, 'es', 'en')

Original word: hola
Top 10 most similar words in en
[('hi', 0.7866629362106323), ('hello', 0.714066207408905), ('Hey', 0.5953907370567322), ('Hello', 0.5875275135040283), ('hello.', 0.5854873061180115), ('hellow', 0.5767785310745239), ('Hi', 0.5712718367576599), ('hey', 0.565157949924469), ('hiya', 0.5630933046340942), ('helllo', 0.5589232444763184)]


Original word: sí
Top 10 most similar words in en
[('if', 0.542435884475708), ('it', 0.524268388748169), ('is', 0.520017683506012), ('so', 0.5113338232040405), ('So', 0.5012075901031494), ('But', 0.4950098693370819), ('.But', 0.4905444383621216), ('but', 0.4886103570461273), ('ok', 0.4771857261657715), ('he', 0.47088468074798584)]


Original word: computadora
Top 10 most similar words in en
[('computer', 0.5488582253456116), ('computers', 0.507689356803894), ('compuer', 0.48534679412841797), ('lap-top', 0.47335493564605713), ('laptop', 0.46047163009643555), ('computer.But', 0.45473140478134155), ('comupter', 0.4428715705871582), ('super-

- English -> Portuguese

In [19]:
translate(EN_WORD_LIST, 'en', 'pt')

Original word: specification
Top 10 most similar words in pt
[('especificação', 0.4581889808177948), ('especificaçao', 0.3417055606842041), ('especifcado', 0.3375580906867981), ('padrãode', 0.33236634731292725), ('especiﬁcação', 0.3270868957042694), ('especificação.', 0.3267023265361786), ('certiﬁcação', 0.3259454071521759), ('modelo-base', 0.3258982300758362), ('omodelo', 0.3231895864009857), ('defnição', 0.32275888323783875)]


Original word: book
Top 10 most similar words in pt
[('livro', 0.710349977016449), ('livrinho', 0.5749400854110718), ('olivro', 0.5637545585632324), ('livroO', 0.5563918352127075), ('livroA', 0.5536922216415405), ('livro.O', 0.5533806681632996), ('umlivro', 0.5441447496414185), ('audiolivro', 0.5404638051986694), ('livo', 0.5332615375518799), ('leitura', 0.530726969242096)]


Original word: duckling
Top 10 most similar words in pt
[('patinho', 0.42583686113357544), ('galinha', 0.40748563408851624), ('pintinho', 0.39733073115348816), ('ovinho', 0.39319077134132

## 4 - Translating words using intermediate languages

### Getting the most similar word in each language it pass.
Most expensive (uses most_similar multiple times) and try to aproximate a word each time.

In [20]:
def intermediate_most_similar_word(word_list, origin_lang, intermediate_lang, target_lang):
    '''
    Translate one word from one language to another passing by an intermediate language.
    In this function, we use the result of the most similar word of the intermediate language to make the next translation.

    Params:
    - word_list: list of example words.
    - origin_lang: language in which the words in word_list are written
    - intermediate_lang: intermediate language which translation between origin_lang and target_lang passes by
    - target_lang: language you wish to know the translation

    Example of usage:
    intermediate_most_similar_word(PT_WORD_LIST, 'es', 'pt', 'en')
    '''
    
    for word in word_list:
        print("Original word:", word)
        
        intermediate_word = MODELS[intermediate_lang].most_similar(TRANSLATIONS[origin_lang][intermediate_lang] @ MODELS[origin_lang][word])[0][0]
        print("Most similar word according to intermediate language:", intermediate_word)

        translated_language = MODELS[target_lang].most_similar(TRANSLATIONS[intermediate_lang][target_lang] @ MODELS[intermediate_lang][intermediate_word])
        print("Top 10 most similar words in target language passing by the intermediate language:")
        print(translated_language)
        
        print("\n")

- Portuguese -> English -> Spanish

In [21]:
intermediate_most_similar_word(PT_WORD_LIST, 'pt', 'en', 'es')

Original word: sapato
Most similar word according to intermediate language: shoes
Top 10 most similar words in target language passing by the intermediate language:
[('zapatos', 0.637431800365448), ('sandalias', 0.5835506319999695), ('zapatillas', 0.5786840915679932), ('botas', 0.5409711003303528), ('calzado', 0.5358108282089233), ('chanclas', 0.5125232934951782), ('calcetines', 0.5100558996200562), ('mocasines', 0.5036314129829407), ('tacones', 0.4987330436706543), ('sneakers', 0.49735501408576965)]


Original word: flor
Most similar word according to intermediate language: flower
Top 10 most similar words in target language passing by the intermediate language:
[('flor', 0.5699925422668457), ('peonía', 0.5194132924079895), ('flores', 0.5110710859298706), ('gerbera', 0.46887627243995667), ('lavanda', 0.4660568833351135), ('florecilla', 0.46439895033836365), ('crisantemo', 0.46398526430130005), ('floral', 0.4629462957382202), ('peonías', 0.4608529806137085), ('flores.La', 0.45880639553

- Spanish -> Portuguese -> English

In [22]:
intermediate_most_similar_word(ES_WORD_LIST, 'es', 'pt', 'en')

Original word: hola
Most similar word according to intermediate language: olá
Top 10 most similar words in target language passing by the intermediate language:
[('hi', 0.7999252676963806), ('hello', 0.7333807349205017), ('hey', 0.6241604089736938), ('hello.', 0.5988674759864807), ('hellow', 0.5984309911727905), ('Hey', 0.5948676466941833), ('helllo', 0.5929532051086426), ('hi.', 0.5902332663536072), ('Hi', 0.5745431780815125), ('Hello', 0.5743246078491211)]


Original word: sí
Most similar word according to intermediate language: só
Top 10 most similar words in target language passing by the intermediate language:
[('if', 0.6177825331687927), ('it', 0.5477291941642761), ('is', 0.5076309442520142), ('So', 0.5075761675834656), ('me', 0.5029937624931335), ('so', 0.5009217858314514), ('But', 0.4981030821800232), ('I', 0.486585795879364), ('but', 0.48316681385040283), (',', 0.48077139258384705)]


Original word: computadora
Most similar word according to intermediate language: computador
T

- English -> Spanish -> Portuguese

In [23]:
intermediate_most_similar_word(EN_WORD_LIST, 'en', 'es', 'pt')

Original word: specification
Most similar word according to intermediate language: especificación
Top 10 most similar words in target language passing by the intermediate language:
[('especificação', 0.5933482050895691), ('descrição', 0.5040943622589111), ('defnição', 0.49539458751678467), ('definição', 0.4773234724998474), ('especiﬁcação', 0.4666079878807068), ('exaustividade', 0.4636813998222351), ('especicação', 0.46293625235557556), ('composicionalidade', 0.45184749364852905), ('especificada', 0.4507888853549957), ('parametrização', 0.44758057594299316)]


Original word: book
Most similar word according to intermediate language: libro
Top 10 most similar words in target language passing by the intermediate language:
[('livro', 0.8350896835327148), ('livrinho', 0.6735722422599792), ('livro.O', 0.6695076823234558), ('livroO', 0.6516822576522827), ('livro.Este', 0.6418003439903259), ('olivro', 0.6386768221855164), ('livro.E', 0.6327352523803711), ('umlivro', 0.6269291639328003), ('liv

### Using the vector transformed to each subspace.
Uses most_similar and try to approximate the word just one time.

In [24]:
def intermediate_most_similar_vector(word_list, origin_lang, intermediate_lang, target_lang):
    '''
    Translate one word from one language to another passing by an intermediate language.
    In this function, we use the result of the vector of the translation passing by the intermediate language to make the next translation.

    Params:
    - word_list: list of example words.
    - origin_lang: language in which the words in word_list are written
    - intermediate_lang: intermediate language which translation between origin_lang and target_lang passes by
    - target_lang: language you wish to know the translation

    Example of usage:
    intermediate_most_similar_vector(PT_WORD_LIST, 'es', 'pt', 'en')
    '''
    
    for word in word_list:
        print("Original word:", word)

        intermediate_vector = TRANSLATIONS[origin_lang][intermediate_lang] @ MODELS[origin_lang][word]
        translated_vector = MODELS[target_lang].most_similar(TRANSLATIONS[intermediate_lang][target_lang] @ intermediate_vector)
        print("Top 10 most similar words in target language passing by the intermediate language:")
        print(translated_vector)

        print("\n")

- Portuguese -> English -> Spanish

In [25]:
intermediate_most_similar_vector(PT_WORD_LIST, 'pt', 'en', 'es')

Original word: sapato
Top 10 most similar words in target language passing by the intermediate language:
[('zapato', 0.48993510007858276), ('abriguito', 0.47686344385147095), ('vestido', 0.47471749782562256), ('zapatos', 0.4711134433746338), ('tacones', 0.44870543479919434), ('bolso', 0.44113263487815857), ('collarcito', 0.4388626515865326), ('jerséy', 0.431425005197525), ('sapato', 0.43105655908584595), ('pantalón', 0.42974868416786194)]


Original word: flor
Top 10 most similar words in target language passing by the intermediate language:
[('flor', 0.5474237203598022), ('peonia', 0.4889832139015198), ('rosaa', 0.48725426197052), ('florecilla', 0.47480836510658264), ('azalea', 0.47139185667037964), ('rosaY', 0.467237263917923), ('rosa', 0.4639436900615692), ('peonía', 0.456820547580719), ('florecita', 0.45346006751060486), ('hortensia', 0.45051074028015137)]


Original word: aniversário
Top 10 most similar words in target language passing by the intermediate language:
[('cumpleaños',

- Spanish -> Portuguese -> English

In [26]:
intermediate_most_similar_vector(ES_WORD_LIST, 'es', 'pt', 'en')

Original word: hola
Top 10 most similar words in target language passing by the intermediate language:
[('hi', 0.7226719856262207), ('hello', 0.6594656705856323), ('hey', 0.5860802531242371), ('Hey', 0.54866623878479), ('helllo', 0.5457444190979004), ('hellow', 0.5423814058303833), ('hi.', 0.541397750377655), ('hello.', 0.5363135933876038), ('Hello', 0.532040536403656), ('hiya', 0.5247058272361755)]


Original word: sí
Top 10 most similar words in target language passing by the intermediate language:
[('if', 0.531143069267273), ('is', 0.5029851794242859), ('But', 0.4877713620662689), ('but', 0.4814220368862152), ('.But', 0.47917017340660095), ('it', 0.46871185302734375), ('so', 0.46371883153915405), ('So', 0.45661598443984985), ('.So', 0.4446946978569031), ('ok', 0.4432774782180786)]


Original word: computadora
Top 10 most similar words in target language passing by the intermediate language:
[('computer', 0.5246008038520813), ('computers', 0.45993557572364807), ('non-computer', 0.441

- English -> Spanish -> Portuguese

In [27]:
intermediate_most_similar_vector(EN_WORD_LIST, 'en', 'es', 'pt')

Original word: specification
Top 10 most similar words in target language passing by the intermediate language:
[('especificação', 0.4372689127922058), ('especifcado', 0.38677334785461426), ('ante-projeto', 0.3675701320171356), ('especificaçao', 0.3611995279788971), ('certiﬁcado', 0.358938992023468), ('defnição', 0.3587265610694885), ('adota-se', 0.35393160581588745), ('ante-projecto', 0.3494497537612915), ('especíca', 0.3478655219078064), ('certiﬁcação', 0.3470357060432434)]


Original word: book
Top 10 most similar words in target language passing by the intermediate language:
[('livro', 0.6597116589546204), ('livro.O', 0.5464816093444824), ('livroO', 0.5437262058258057), ('livrinho', 0.5405091643333435), ('livroA', 0.5388786792755127), ('umlivro', 0.530348002910614), ('leitura', 0.5287427306175232), ('olivro', 0.5211440324783325), ('livro.E', 0.5197508931159973), ('autora', 0.5100651979446411)]


Original word: duckling
Top 10 most similar words in target language passing by the int

## 5 - Evaluate

### Cosine similarity
Theorically speaking, translating the vector that one sentence represents to another should result in a similar sentence.

In [28]:
def evaluate_single_cosine_similarity(origin_lang, target_lang):
    '''
    Evaluate cosine similarity between single sentences.
    Cosine similarity has an interval from -1 to 1, and the closer to 1 the value is, more similar the params are.

    Params:
    - origin_lang: language in which the words in word_list are written
    - target_lang: language you wish to know the translation

    Example of usage:
    evaluate_single_cosine_similarity('pt', 'en')
    '''
    
    for index in range(5):
        print(TEST_SET[origin_lang][index][0], '->', TEST_SET[target_lang][index][0])

        vector_translated = TRANSLATIONS[origin_lang][target_lang] @ TEST_SET[origin_lang][index][1]
        vector_target = TEST_SET[target_lang][index][1]

        print("Cossine similarity:", cosine_similarity([vector_translated], [vector_target])[0][0], "\n")

- Portuguese -> English

In [29]:
evaluate_single_cosine_similarity('pt', 'en')

onde é que eu deveria ir jantar em albufeira hoje à noite -> where should i go for dinner in san francisco tonight
Cossine similarity: 0.84695405 

qual é o melhor restaurante na maia -> what is the best shop restaurant around angel
Cossine similarity: 0.8288503 

recomenda me lojas para fazer compras em coimbra -> recommend me some shops to shopping in angel
Cossine similarity: 0.8608927 

o que há a acontecer hoje na cidade esta noite -> what's happening in town tonight
Cossine similarity: 0.7119084 

que eventos estão a acontecer perto de mim -> what events are going on near me
Cossine similarity: 0.8129816 



- Portuguese -> Spanish

In [30]:
evaluate_single_cosine_similarity('pt', 'es')

onde é que eu deveria ir jantar em albufeira hoje à noite -> dónde debería cenar esta noche en madrid
Cossine similarity: 0.7200791 

qual é o melhor restaurante na maia -> cuál es la mejor tienda restaurante alrededores de colón
Cossine similarity: 0.8510599 

recomenda me lojas para fazer compras em coimbra -> recomendarme algunas tiendas para ir de compras en malasaña
Cossine similarity: 0.72395414 

o que há a acontecer hoje na cidade esta noite -> que esta pasando en el pueblo esta noche
Cossine similarity: 0.86754334 

que eventos estão a acontecer perto de mim -> qué eventos están sucediendo cerca de mí
Cossine similarity: 0.8847584 



- English -> Spanish

In [31]:
evaluate_single_cosine_similarity('en', 'es')

where should i go for dinner in san francisco tonight -> dónde debería cenar esta noche en madrid
Cossine similarity: 0.78502417 

what is the best shop restaurant around angel -> cuál es la mejor tienda restaurante alrededores de colón
Cossine similarity: 0.84819996 

recommend me some shops to shopping in angel -> recomendarme algunas tiendas para ir de compras en malasaña
Cossine similarity: 0.8010326 

what's happening in town tonight -> que esta pasando en el pueblo esta noche
Cossine similarity: 0.7965214 

what events are going on near me -> qué eventos están sucediendo cerca de mí
Cossine similarity: 0.8242764 



In [32]:
def pairwise(iterable):
    '''
    Return successive overlapping pairs taken from the input iterable.
    The number of 2-tuples in the output iterator will be one fewer than the number of inputs. 
    It will be empty if the input iterable has fewer than two values.
    pairwise('ABCDEFG') --> AB BC CD DE EF FG

    Source: https://docs.python.org/3/library/itertools.html#itertools.pairwise
    '''
    a, b = it.tee(iterable)
    next(b, None)
    return zip(a, b)

In [33]:
def avaliate_path(path):
    '''
    Avaliate cosine similarity of translation path.

    Params:
    - path: path of desired translation

    Example of usage:
    avaliate_path(['pt', 'en', 'es'])
    '''

    translation_matrix = np.identity(300)

    for (origin, target) in pairwise(path):
        translation_matrix = TRANSLATIONS[origin][target] @ translation_matrix
    
    vectors = [translation_matrix @ v for _, v in TEST_SET[path[0]] ]
    vectors_target = [ v for _, v in TEST_SET[path[-1]]]
    
    mean_cos_sim = sum( [ cosine_similarity([v1], [v2]) for v1, v2 in zip(vectors, vectors_target) ] )/ len(vectors)

    print("Average cosine similarity of path", path, "=", mean_cos_sim[0][0])


- Portuguese -> Spanish

In [34]:
avaliate_path(['pt', 'es'])

Average cosine similarity of path ['pt', 'es'] = 0.7711279304783111


- Portuguese -> English -> Spanish

In [35]:
avaliate_path(['pt', 'en', 'es'])

Average cosine similarity of path ['pt', 'en', 'es'] = 0.7465551240657466


- Portuguese -> English -> Portuguese -> English -> Spanish

In [36]:
avaliate_path(['pt', 'en', 'pt', 'en', 'es'])

Average cosine similarity of path ['pt', 'en', 'pt', 'en', 'es'] = 0.7465551168871883
