In [1]:
# -*- coding: utf-8 -*-

import sys
import nltk
import numpy as np
import pandas as pd
import pickle5 as pickle

from nltk import meteor_score
from scipy import stats
from scipy.spatial.distance import euclidean
import pulp

from collections import defaultdict
from itertools import product

import torch
from torch.autograd import Variable


In [None]:
use_GPU = torch.cuda.is_available()
device = torch.device("cuda" if use_GPU else "cpu")
print('Device: ' + str(device))
if use_GPU:
    torch.cuda.manual_seed(0)
    print('GPU: ' + str(torch.cuda.get_device_name(int("0")))) 
print("Using GPU: {}".format(use_GPU))

# Data Exploration
### WMT-17

In [2]:
def load_obj(data_path, name):
    with open(data_path + name + '.pkl', 'rb') as f:
        data = pickle.load(f)
        print(f"{name} has {len(data)}")
        return data

In [3]:
data_path = sys.path[0]+"/wmt17-processed-data/final_"
lang = ["csen","deen","enru", "enzh", "fien","lven","ruen", "zhen"]

In [4]:
data = {}
for i in lang:
    data[i] = load_obj(data_path, i)

csen has 3005
deen has 3004
enru has 3001
enzh has 2001
fien has 3002
lven has 2001
ruen has 3001
zhen has 2001


In [5]:
nums = 0
collections = {}

for l in lang:    
    
    num_sens_lang = 0
    score_lang = []
    
    for i in data[l].values():
        if len(i[3]) > 0: # With human scores
            
            for k in i[3]:
                score_lang.append([i[1],i[2],k[0],k[1]]) # A human score, A sample
            
            nums += len(i[3])
            num_sens_lang +=  len(i[3])
            
    collections[l] = score_lang     
    print(f"{l}: {num_sens_lang}")
    
print(nums)

csen: 560
deen: 560
enru: 560
enzh: 560
fien: 560
lven: 560
ruen: 560
zhen: 560
4480


In [6]:
# lang = "deen"
# src = [sample[0] for sample in collections[lang]]
# ref = [sample[1] for sample in collections[lang]]
# MT = [sample[2] for sample in collections[lang]]
# score = [sample[3] for sample in collections[lang]]

# WMD
### Bert

In [8]:
def bert_tokenization(txt, tokenizer):
    
    tokens = tokenizer.tokenize("[CLS] " + txt + " [SEP]")
    
    indexed_tokens= torch.tensor(tokenizer.convert_tokens_to_ids(tokens)).unsqueeze(0)
    segments_ids = torch.tensor([0]*len(tokens)).unsqueeze(0)

    return tokens[1:-1], indexed_tokens, segments_ids 

In [9]:
### Each Layers' output
def layer_processing(model):
    layers = []

    def layer_hook(module, input_, output):
        layers.append(output[0])

    for i in model.encoder.layer:
        i.register_forward_hook(layer_hook)

    return layers

In [10]:
from transformers import BertTokenizer, BertModel
bert_tokenizer = BertTokenizer.from_pretrained("bert-base-multilingual-cased")
bert_model = BertModel.from_pretrained('bert-base-multilingual-cased', return_dict=True)
# bert_model.embeddings.word_embeddings
bert_model.eval()
print()




In [11]:
layers = layer_processing(bert_model)

# XLM-RoBerta

In [12]:
# from transformers import AutoTokenizer, AutoModel
# xlm_r_tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
# xlm_r_model = AutoModel.from_pretrained("xlm-roberta-base",return_dict=True)
# xlm_r_model.eval()
# # xlm_r_model.embeddings.word_embeddings
# print()

In [13]:
# layers = layer_processing(xlm_r_model)

# Meteor Score

In [14]:
from nltk import meteor_score
from nltk.stem.porter import PorterStemmer
from nltk.corpus import wordnet
from itertools import chain, product

def order_penalty(    
    reference,
    hypothesis,
    preprocess=str.lower,
    stemmer=PorterStemmer(),
    wordnet=wordnet):
    
    enum_hypothesis, enum_reference = meteor_score._generate_enums(
        hypothesis, reference, preprocess=preprocess
    )
    
    translation_length = len(enum_hypothesis)
    reference_length = len(enum_reference)
    
    matches, _, _ = meteor_score._enum_allign_words(enum_hypothesis, enum_reference, stemmer=stemmer)
    
    matches_count = len(matches)
    
    try:
        chunk_count = float(meteor_score._count_chunks(matches))
        frag_frac = chunk_count / matches_count
        
    except ZeroDivisionError: # No unigrams match
        return 1
    
    return frag_frac


In [15]:
## From the aspects of model embedding.
def tokens_to_fracdict(tokens):
    cntdict = defaultdict(lambda : 0)
        
    for token in tokens:
        cntdict[token] += 1
    totalcnt = sum(cntdict.values())
    return {token: float(cnt)/totalcnt for token, cnt in cntdict.items()}


## From the aspects of model output, considering contextual relationship.
## Each tokens means different, even they are the same.
def tokens_to_fracdict_contextual(tokens):
    
    return {token: 1/len(tokens) for token in range(len(tokens))}

In [16]:
## There are two components can be used as embedding
## 1) model embedding 
## 2) Model output states

def embedding_processing(sent1, sent2, tokenizer, model, embed_type):
    
    sent1_tokens = tokenizer.tokenize(sent1)
    sent2_tokens = tokenizer.tokenize(sent2)
    
    if embed_type == 1:
        
        sent1_buckets = tokens_to_fracdict(sent1_tokens)
        sent2_buckets = tokens_to_fracdict(sent2_tokens) 
        
        sent1_embedding = model.embeddings.word_embeddings(torch.tensor(tokenizer.convert_tokens_to_ids(list(sent1_buckets.keys()))))
        sent2_embedding = model.embeddings.word_embeddings(torch.tensor(tokenizer.convert_tokens_to_ids(list(sent2_buckets.keys()))))
        
    elif embed_type == 2:
        
#         sent1_buckets = tokens_to_fracdict(sent1_tokens)
#         sent2_buckets = tokens_to_fracdict(sent2_tokens) 
        
        sent1_buckets = tokens_to_fracdict_contextual(sent1_tokens)
        sent2_buckets = tokens_to_fracdict_contextual(sent2_tokens) 
        
        sent1_id = tokenizer(sent1,return_tensors="pt")
        sent2_id = tokenizer(sent2,return_tensors="pt")
        
#         sent1_embedding = model(sent1_id['input_ids']).last_hidden_state.squeeze(0)
#         sent2_embedding = model(sent2_id['input_ids']).last_hidden_state.squeeze(0)
        
        model(sent1_id['input_ids'])
        sent1_embedding = torch.mean(torch.stack(layers[-4:]).squeeze(1).permute(1,0,2), dim=1)
        
        model(sent2_id['input_ids'])
        sent2_embedding = torch.mean(torch.stack(layers[-4:]).squeeze(1).permute(1,0,2), dim=1)
    
    if sent1_embedding.size()[0] - 2 == len(sent1_tokens):
        sent1_embedding = sent1_embedding[1:-1,:] # Remove bos and eos tokens

    if sent2_embedding.size()[0] - 2 == len(sent2_tokens):
        sent2_embedding = sent2_embedding[1:-1,:] # Remove bos and eos tokens  
    
    
    all_embedding = torch.cat([sent1_embedding, sent2_embedding])
    
#     print(len(sent1_tokens))
#     print(len(sent2_tokens))
#     print(sent1_embedding.size())
#     print(sent2_embedding.size())

    assert len(sent1_buckets) + len(sent2_buckets) == all_embedding.size()[0]
    
    return sent1_buckets, sent2_buckets, all_embedding.cpu()

In [17]:
def word_mover_distance_probspec(sent1_buckets, sent2_buckets, all_embedding, lpFile=None,):

    # Updated buckets with labeled name
    first_sent_buckets = {f"x{idx}": item[1] for idx, item in enumerate(sent1_buckets.items())}
    second_sent_buckets = {f"y{idx}": item[1] for idx, item in enumerate(sent2_buckets.items())}

    var_names = list(first_sent_buckets.keys()) + list(second_sent_buckets.keys())
    
    assert len(var_names) == all_embedding.size(0)
    
    wordvecs = {token: embedding.detach().numpy() for token, embedding in zip(var_names, all_embedding)}
    
    
    T = pulp.LpVariable.dicts('T_matrix', list(product(var_names, var_names)), lowBound=0)

    prob = pulp.LpProblem('WMD', sense=pulp.LpMinimize)
    
    prob += pulp.lpSum([T[token1, token2]*euclidean(wordvecs[token1], wordvecs[token2])
                        for token1, token2 in product(var_names, var_names)])
    
    for token2 in second_sent_buckets:   #constrains
        prob += pulp.lpSum([T[token1, token2] for token1 in first_sent_buckets])==second_sent_buckets[token2]
        
    for token1 in first_sent_buckets:    #constrains
        prob += pulp.lpSum([T[token1, token2] for token2 in second_sent_buckets])==first_sent_buckets[token1]

    if lpFile!=None:
        prob.writeLP(lpFile)

    prob.solve()

    return prob

In [18]:
def word_mover_distance(sent1, sent2, tokenizer, model, embed_type, lpFile=None):
    
    sent1_buckets, sent2_buckets, embeddings = embedding_processing(sent1, sent2, tokenizer, model, embed_type)
    
    prob = word_mover_distance_probspec(sent1_buckets, sent2_buckets, embeddings, lpFile=lpFile)
    
    return pulp.value(prob.objective)

In [19]:
def fluency_based_wmd(wmd, ref, hypo, gamma=0.2):
    
    frag_penalty = order_penalty(ref, hypo)

    # print(frag_penalty)
    
    return wmd - gamma *(0.5 - frag_penalty)

# Evaluation

In [20]:
def compute_WMD_WMDo(sents, tokenizer, model, embed_type, cross_lingual=False):
    wmd = []
    wmdo =[]
    score = []
    
    for i in range(len(sents)):  # Sent structure: [src, ref, MT, score]
        hypo = sents[i][2]
        if cross_lingual:
            ref = sents[i][0]   # src - mt
        else:
            ref = sents[i][1]   # ref - mt
       
        wmd_tmp = word_mover_distance(ref, hypo, tokenizer, model, embed_type)
        wmdo_tmp = fluency_based_wmd(wmd_tmp, ref, hypo)
        
        wmd.append(wmd_tmp)
        wmdo.append(wmdo_tmp)
        score.append(float(sents[i][3]))
        
        if i % 100 == 0:
            print(i)
        
    return wmd, wmdo, score

In [21]:
def evaluation(wmd, wmdo, score):
    pearson = stats.pearsonr(wmd, score)
    pearson_o = stats.pearsonr(wmdo, score)
    spearman = stats.spearmanr(wmd, score)
    spearman_o = stats.spearmanr(wmdo, score)
    print("Spearman Correlation:", spearman, spearman_o)
    print("Pearson Correlation:", pearson, pearson_o)
    return pearson, pearson_o, spearman, spearman_o

In [22]:
# Test Src - MT

In [23]:
wmd, wmdo, score = compute_WMD_WMDo(collections["lven"], bert_tokenizer, bert_model, embed_type=2, cross_lingual=True)

0
100
200
300
400
500


In [24]:
print("German")
print(f"Average WMD: {sum(wmd)/len(wmd)}")
print(f"Average WMDo: {sum(wmdo)/len(wmdo)}")
_,_,_,_ = evaluation(wmd, wmdo, score)

German
Average WMD: 19.45333163866935
Average WMDo: 19.547121179485675
Spearman Correlation: SpearmanrResult(correlation=-0.07554353990742502, pvalue=0.0740595906766064) SpearmanrResult(correlation=-0.07400342475581874, pvalue=0.08016536079430812)
Pearson Correlation: (-0.05155513853432033, 0.2231854593405953) (-0.05000084606885026, 0.23747121082738024)


In [25]:
### Bert

## Non hidden layers
# German
# Average WMD: 1.1278919187622316
# Average WMDo: 1.1550826268636547
# Spearman Correlation: SpearmanrResult(correlation=-0.10812368206349056, pvalue=0.01045317863165258) SpearmanrResult(correlation=-0.09288235704104379, pvalue=0.027960552416333505)
# Pearson Correlation: (-0.12908210458376235, 0.0022087308197858723) (-0.12325126049669459, 0.0034859020949488083)

## last hidden layer
# German
# Average WMD: 14.351269957197045
# Average WMDo: 14.378460665298482
# Spearman Correlation: SpearmanrResult(correlation=-0.2696137651678008, pvalue=8.78723772886295e-11) SpearmanrResult(correlation=-0.26906964444584147, pvalue=9.61577344615588e-11)
# Pearson Correlation: (-0.2650495517164887, 1.8594414641329894e-10) (-0.2651203733059849, 1.83813906763206e-10)

## Last Four Layers
# German
# Average WMD: 15.908297730517315
# Average WMDo: 15.93548843861872
# Spearman Correlation: SpearmanrResult(correlation=-0.3359035629213468, pvalue=3.090735442641185e-16) SpearmanrResult(correlation=-0.3366638149811885, pvalue=2.6257508005501394e-16)
# Pearson Correlation: (-0.32847915287528817, 1.4835516049568652e-15) (-0.3297559748812626, 1.1362222688075229e-15)

## Last six layers
# German
# Average WMD: 15.260092905704722
# Average WMDo: 15.287283613806162
# Spearman Correlation: SpearmanrResult(correlation=-0.3207721143425658, pvalue=7.228071509687232e-15) SpearmanrResult(correlation=-0.32236450280225937, pvalue=5.230437222396459e-15)
# Pearson Correlation: (-0.31662830168951356, 1.6622845823626887e-14) (-0.3180128624110832, 1.2603213228708016e-14)

## Last number 8-10 Layers [-5:-3]
# German
# Average WMD: 16.781514336055146
# Average WMDo: 16.808705044156596
# Spearman Correlation: SpearmanrResult(correlation=-0.3297802061139955, pvalue=1.1304715951095853e-15) SpearmanrResult(correlation=-0.33128451575784, pvalue=8.242695524547087e-16)
# Pearson Correlation: (-0.32746672398007093, 1.8313369210425085e-15) (-0.32890059943905126, 1.3587114711255517e-15)


In [26]:
### XLM-RoBerta

## Non hidden layers
# German
# Average WMD: 4.548243439785226
# Average WMDo: 4.575434147886658
# Spearman Correlation: SpearmanrResult(correlation=-0.10094631770418366, pvalue=0.01686696635603566) SpearmanrResult(correlation=-0.10453465822822869, pvalue=0.01332368619741844)
# Pearson Correlation: (-0.11512082287785595, 0.006386634715818683) (-0.11593462973560906, 0.006020716138341932)

## last hidden layer
# German
# Average WMD: 3.1401635565505166
# Average WMDo: 3.167354264651937
# Spearman Correlation: SpearmanrResult(correlation=-0.1841105542883929, pvalue=1.1617141256155387e-05) SpearmanrResult(correlation=-0.18227333541971025, pvalue=1.4238788274399358e-05)
# Pearson Correlation: (-0.1980735309589291, 2.316129292293194e-06) (-0.1945171215143755, 3.531693640227786e-06)

## Last Four hidden layers
# German
# Average WMD: 8.578276218217889
# Average WMDo: 8.605466926319314
# Spearman Correlation: SpearmanrResult(correlation=-0.3304421980208574, pvalue=9.839671409389784e-16) SpearmanrResult(correlation=-0.3332079001709645, pvalue=5.489755425724e-16)
# Pearson Correlation: (-0.3433748475062588, 6.10447414514315e-17) (-0.3456032759711427, 3.7310746037515345e-17)

## Last Six hidden layers
# German
# Average WMD: 9.094585309319006
# Average WMDo: 9.121776017420439
# Spearman Correlation: SpearmanrResult(correlation=-0.3036259226409706, pvalue=2.087032508340178e-13) SpearmanrResult(correlation=-0.30626733048073684, pvalue=1.2609214791847759e-13)
# Pearson Correlation: (-0.31417814358164664, 2.703520305731303e-14) (-0.3162422053465792, 1.7952277506910935e-14)

## Last number 8-10 Layers [-5:-3]
# German
# Average WMD: 11.240478491699427
# Average WMDo: 11.267669199800848
# Spearman Correlation: SpearmanrResult(correlation=-0.3249012273636077, pvalue=3.1118292366112528e-15) SpearmanrResult(correlation=-0.3273563645656669, pvalue=1.8737764667209278e-15)
# Pearson Correlation: (-0.33702823695287076, 2.427964862381532e-16) (-0.33909284357952296, 1.554896139592844e-16)


In [27]:
# Test Ref - MT

In [28]:
wmd, wmdo, score = compute_WMD_WMDo(collections["ruen"], bert_tokenizer, bert_model, embed_type=2, cross_lingual=False)

0
100
200
300
400
500


In [29]:
print("German")
print(f"Average WMD: {sum(wmd)/len(wmd)}")
print(f"Average WMDo: {sum(wmdo)/len(wmdo)}")
_,_,_,_ = evaluation(wmd, wmdo, score)

German
Average WMD: 11.470880748845056
Average WMDo: 11.476147504216765
Spearman Correlation: SpearmanrResult(correlation=-0.6574820982280734, pvalue=1.2953695967988928e-70) SpearmanrResult(correlation=-0.657612360279939, pvalue=1.1905310849060568e-70)
Pearson Correlation: (-0.6515631703753917, 5.73846456810036e-69) (-0.6516880981397738, 5.301845335362458e-69)


In [30]:
## Last four layers
# German
# Average WMD: 12.152743508204132
# Average WMDo: 12.164223020759186
# Spearman Correlation: SpearmanrResult(correlation=-0.5986864708478936, pvalue=9.371146594366734e-56) SpearmanrResult(correlation=-0.5975402162816665, pvalue=1.7033444717680657e-55)
# Pearson Correlation: (-0.5999878379668633, 4.7412586471769775e-56) (-0.599349956080672, 6.623719091764433e-56)

## Non hidden layers
# German
# Average WMD: 0.5559826012993426
# Average WMDo: 0.567462113854396
# Spearman Correlation: SpearmanrResult(correlation=-0.4971708182378498, pvalue=2.6706184671414167e-36) SpearmanrResult(correlation=-0.5072228728863477, pvalue=6.053904855831126e-38)
# Pearson Correlation: (-0.5138784581813165, 4.59877547363914e-39) (-0.5200391946509815, 4.01995249513834e-40)


## Source to Machine Translation

In [32]:
# de-en
# Average WMD: 15.908297730517315
# Average WMDo: 15.93548843861872
# Spearman Correlation: SpearmanrResult(correlation=-0.3359035629213468, pvalue=3.090735442641185e-16) SpearmanrResult(correlation=-0.3366638149811885, pvalue=2.6257508005501394e-16)
# Pearson Correlation: (-0.32847915287528817, 1.4835516049568652e-15) (-0.3297559748812626, 1.1362222688075229e-15)

# zh-en
# Average WMD: 18.526867907578072
# Average WMDo: 18.624356002816167
# Spearman Correlation: SpearmanrResult(correlation=-0.4906607072817744, pvalue=2.903517560982787e-35) SpearmanrResult(correlation=-0.4904195667533562, pvalue=3.168722649602164e-35)
# Pearson Correlation: (-0.4367180311484281, 1.7545618407628865e-27) (-0.43622472132132417, 2.0375547905927388e-27)

# lv-en
# Average WMD: 19.45333163866935
# Average WMDo: 19.547121179485675
# Spearman Correlation: SpearmanrResult(correlation=-0.07554353990742502, pvalue=0.0740595906766064) SpearmanrResult(correlation=-0.07400342475581874, pvalue=0.08016536079430812)
# Pearson Correlation: (-0.05155513853432033, 0.2231854593405953) (-0.05000084606885026, 0.23747121082738024)

# en-zh
# Average WMD: 18.267354630702208
# Average WMDo: 18.365926059273626
# Spearman Correlation: SpearmanrResult(correlation=-0.44545133389182645, pvalue=1.193123737108207e-28) SpearmanrResult(correlation=-0.44517821439846067, pvalue=1.2993069633404653e-28)
# Pearson Correlation: (-0.45827881960369976, 1.992507249622736e-30) (-0.4580581015233263, 2.1410331158015858e-30)

# fi-en
# Average WMD: 18.941786521311673
# Average WMDo: 19.029337825166568
# Spearman Correlation: SpearmanrResult(correlation=-0.16754080392931286, pvalue=6.774351638781072e-05) SpearmanrResult(correlation=-0.16692883040352263, pvalue=7.207975901097237e-05)
# Pearson Correlation: (-0.11723068129768785, 0.005476728966044557) (-0.11686178519562548, 0.005626873352456735)

# cs-en
# Average WMD: 18.230574645529135
# Average WMDo: 18.319016113783107
# Spearman Correlation: SpearmanrResult(correlation=-0.19161508805831654, pvalue=4.95483614045432e-06) SpearmanrResult(correlation=-0.19051570864156364, pvalue=5.625477261207631e-06)
# Pearson Correlation: (-0.17953271800733833, 1.9216993852820625e-05) (-0.17881143957727652, 2.0779447444770185e-05)

# lv-en
# Average WMD: 19.45333163866935
# Average WMDo: 19.547121179485675
# Spearman Correlation: SpearmanrResult(correlation=-0.07554353990742502, pvalue=0.0740595906766064) SpearmanrResult(correlation=-0.07400342475581874, pvalue=0.08016536079430812)
# Pearson Correlation: (-0.05155513853432033, 0.2231854593405953) (-0.05000084606885026, 0.23747121082738024)

# ru-en
# Average WMD: 17.480736122245396
# Average WMDo: 17.575593265102583
# Spearman Correlation: SpearmanrResult(correlation=-0.19024771398232415, pvalue=5.8016367858973835e-06) SpearmanrResult(correlation=-0.189810804790285, pvalue=6.100168768633355e-06)
# Pearson Correlation: (-0.16099562256029112, 0.0001300695184481004) (-0.16042567113071327, 0.00013751111203564234)

# en-ru
# Average WMD: 17.47576842490002
# Average WMDo: 17.572411282042886
# Spearman Correlation: SpearmanrResult(correlation=-0.39878138277591085, pvalue=8.668344369037924e-23) SpearmanrResult(correlation=-0.3976187287404798, pvalue=1.181716803338035e-22)
# Pearson Correlation: (-0.3832027466749461, 4.983027094441086e-21) (-0.3813803711216598, 7.893025247716566e-21)


## Reference to Machine Translation

In [33]:
# de-en
# Average WMD: 12.15274346853989
# Average WMDo: 12.164222981094943
# Spearman Correlation: SpearmanrResult(correlation=-0.5986864708478936, pvalue=9.371146594366734e-56) SpearmanrResult(correlation=-0.5975402162816665, pvalue=1.7033444717680657e-55)
# Pearson Correlation: (-0.5999878543167821, 4.741217968929527e-56) (-0.5993499722107763, 6.623663152618938e-56)

# zh-en
# Average WMD: 13.77200166588928
# Average WMDo: 13.806597245244305
# Spearman Correlation: SpearmanrResult(correlation=-0.6950206428318605, pvalue=5.456088264576756e-82) SpearmanrResult(correlation=-0.6954839961679907, pvalue=3.850190278505142e-82)
# Pearson Correlation: (-0.7087166667851119, 1.3683539826421444e-86) (-0.7085607964096223, 1.5490357551779024e-86)

# lv-en
# Average WMD: 14.330035314128063
# Average WMDo: 14.367372335364394
# Spearman Correlation: SpearmanrResult(correlation=-0.6186720939798916, pvalue=1.8765479508817297e-60) SpearmanrResult(correlation=-0.6162930776473869, pvalue=7.085872047167209e-60)
# Pearson Correlation: (-0.6395109578606819, 9.99194452340967e-66) (-0.6383365518704406, 2.03103488095823e-65)

# en-zh
# Average WMD: 13.61301603685046
# Average WMDo: 13.712539846374266
# Spearman Correlation: SpearmanrResult(correlation=-0.7736187205407453, pvalue=1.1800453111953126e-112) SpearmanrResult(correlation=-0.773656029333184, pvalue=1.1335896724174616e-112)
# Pearson Correlation: (-0.7749136943293855, 2.9140838452432793e-113) (-0.7749182816485176, 2.8996340124338556e-113)

# fi-en
# Average WMD: 14.155241639622409
# Average WMDo: 14.19069215574797
# Spearman Correlation: SpearmanrResult(correlation=-0.7633166944762979, pvalue=5.808936367743332e-108) SpearmanrResult(correlation=-0.7628387133188071, pvalue=9.461707470363626e-108)
# Pearson Correlation: (-0.7679809030320991, 4.676311604162688e-110) (-0.7680209749904956, 4.4843394601638855e-110)

# cs-en
# Average WMD: 12.729601912847894
# Average WMDo: 12.748601910876483
# Spearman Correlation: SpearmanrResult(correlation=-0.6139748442349198, pvalue=2.5583272054944845e-59) SpearmanrResult(correlation=-0.6137145080442491, pvalue=2.9531188505407143e-59)
# Pearson Correlation: (-0.6215747717519267, 3.65234443387317e-61) (-0.6215623193615742, 3.6782135669184304e-61)

# ru-en
# Average WMD: 11.470880748845056
# Average WMDo: 11.476147504216765
# Spearman Correlation: SpearmanrResult(correlation=-0.6574820982280734, pvalue=1.2953695967988928e-70) SpearmanrResult(correlation=-0.657612360279939, pvalue=1.1905310849060568e-70)
# Pearson Correlation: (-0.6515631703753917, 5.73846456810036e-69) (-0.6516880981397738, 5.301845335362458e-69)

# en-ru
# Average WMD: 12.328497785693296
# Average WMDo: 12.345642483775569
# Spearman Correlation: SpearmanrResult(correlation=-0.6455143351860179, pvalue=2.533030628763656e-67) SpearmanrResult(correlation=-0.6445313236694906, pvalue=4.6497826244462814e-67)
# Pearson Correlation: (-0.6436741561101775, 7.882491445638172e-67) (-0.6430047453014485, 1.1889749925967755e-66)
