# Analysis Translation

In [1]:
import os 
import json
import math
from tqdm.notebook import tqdm
import scipy.stats as stats
import pandas as pd
import seaborn as sns
from matplotlib import rcParams
import sys
sys.path.append('../')
from bary_score import BaryScoreMetric
# figure size in inches
rcParams['figure.figsize'] = 20.7,8.27

In [2]:

def split_div(string_key):
    name, type_, alpha, beta, temp, tf_idf = '', '', -1, -1, -1, False
    if string_key in ['human','utt_level', 'DataCoverage', 'Fluency', 'Relevance', 'TextStructure', 'litepyramid_recall',
                          'responsiveness', 'js-2', 'mover_score', 'meteor', 'bleu','chrf',
                          'rouge_we_3_p', 'rouge_we_3_r', 'rouge_we_3_f']:
            name = string_key
            temp = 1
            type_ = 'normal'
            tf_idf = True
    elif 'new_score' in string_key:
            name = string_key
    elif 'bert' in string_key or 'rouge' in string_key:
            name = string_key
            temp = 1
            type_ = 'normal'
            tf_idf = True
    else:
            print('name',name)
            name = None
    assert name != ''
    assert type_ != ''
    assert temp != -1
    return name, type_, alpha, beta, temp, tf_idf
  


In [3]:
year = '15'
index = 1
data_type = ['cs-en', 'de-en', 'ru-en', 'fi-en', 'ro-en', 'tr-en'][index]
file_path = '{}_{}_formated.json'.format(year,data_type)
print('Loading {} {}'.format(year,data_type))

Loading 15 de-en


In [4]:
with open(file_path,'r') as file :
    all_data = json.load(file)

# Computing New BaryScore

In [None]:
metric = BaryScoreMetric(use_idfs=True)

refs,sentences = [],[]
for k,v in tqdm(all_data.items()):
    refs.append(v['references_sentences'])
    sentences.append(v['system']['wmt{}'.format(year)]['generated_sentence'])
    
a = metric.prepare_idfs(refs, sentences)

for k,v in tqdm(all_data.items()):
    ref = v['references_sentences']
    sentence = v['system']['wmt{}'.format(year)]['generated_sentence']
    scores = metric.evaluate_batch(ref, sentence)
    for score,value in scores.items():
        all_data[k]['system']['wmt{}'.format(year)]['scores']['new_score_{}'.format(score)] = value 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))

BaryScore Progress: 100%|██████████| 1/1 [00:03<00:00,  3.15s/it]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  1.33it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  2.69it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  2.15it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  3.34it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  3.06it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  5.34it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  1.76it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  1.85it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00, 47.28it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  4.16it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  1.17it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  3.91it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  5.41it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:01<00:00,  1.17s/it]
BaryScore 

BaryScore Progress: 100%|██████████| 1/1 [00:01<00:00,  1.03s/it]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  4.24it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  1.87it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  5.06it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  1.01it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  1.30it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  5.24it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:02<00:00,  2.70s/it]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  2.66it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:02<00:00,  2.98s/it]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  6.59it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  1.29it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  1.40it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  3.28it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  1.82it/s]
BaryScore 

BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  2.53it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  1.90it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  4.25it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  4.00it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:01<00:00,  1.19s/it]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  2.42it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  3.44it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  2.75it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  3.83it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  1.72it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  2.36it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  3.21it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  2.94it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  3.11it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00, 59.40it/s]
BaryScore 

BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  2.35it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  1.74it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  2.21it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  1.78it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  2.06it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  1.94it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  1.44it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  2.83it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:01<00:00,  1.05s/it]
BaryScore Progress: 100%|██████████| 1/1 [00:03<00:00,  3.94s/it]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  2.21it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  2.26it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  3.54it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:00<00:00,  1.99it/s]
BaryScore Progress: 100%|██████████| 1/1 [00:03<00:00,  3.47s/it]
BaryScore 

# Reproducing Correlation Computation Instance Level Correlation

In [None]:
final_correlations_spearman_lit = {}
final_correlations_pearson_lit = {}
final_correlations_kendall_lit = {}
ids = list(all_data.keys())[10]
sys = list(all_data[ids]['system'].keys())[0]
for metric_name,_ in tqdm(all_data[ids]['system'][sys]['scores'].items()):
        print(metric_name)
        predicted = []
        utt_golden_scores = []
        for key_data,value_data in all_data.items():
                for key_system, value_system in value_data['system'].items():
                    try :
                        predicted.append(sum(sum(value_system['scores'][metric_name],[]))/len(sum(value_system['scores'][metric_name],[])))
                    except :
                        try :
                            predicted.append(sum(value_system['scores'][metric_name]))
                        except :
                            predicted.append(value_system['scores'][metric_name])
                    utt_golden_scores.append(value_system['scores']['human'])
        predicted_score = [0 if math.isnan(x) else x for x in predicted]
        predicted_score = [0 if math.isinf(x) else x for x in predicted_score]
        predicted_score = [0 if math.isinf(-x) else x for x in predicted_score]
        if len(predicted_score) > 0 and len(utt_golden_scores) > 0:
                final_correlations_spearman_lit[metric_name] =  abs(stats.spearmanr(predicted_score, utt_golden_scores)[0])
                final_correlations_pearson_lit[metric_name] = abs(stats.pearsonr(predicted_score, utt_golden_scores)[0])
                final_correlations_kendall_lit[metric_name] = abs(stats.kendalltau(predicted_score, utt_golden_scores)[0])


In [None]:
names, types,alphas,betas,temps,tf_idfs,pearson_lit,spearman_lit,kendall_lit = [],[],[],[],[],[],[],[],[]
for k,value in tqdm(final_correlations_spearman_lit.items()):
    print(k)
    if 'meteor' or 'chrf' in k :
            name= k 
            tf_idf,temp,alpha,beta,type_ = 1,1,1,1,1
    else :
            if k[0] in ['2','3','4']:
                k_ = k[2:]
                add = True
            else :
                k_ = k
            name, type_, alpha, beta, temp, tf_idf = split_div(k_)
            if add :
                name += k[0]
    names.append(name)
    tf_idfs.append(tf_idf)
    temps.append(temp)
    alphas.append(alpha)
    betas.append(beta)
    types.append(type_)
    kendall_lit.append(final_correlations_kendall_lit[k])
    
    pearson_lit.append(final_correlations_pearson_lit[k])
    spearman_lit.append(final_correlations_spearman_lit[k])


df_dict = {'names': names, 'types' : types,'betas':betas,'alphas':alphas,
           'temps' : temps,'pearson' : pearson_lit, 'kendall': kendall_lit,
           'spearman':spearman_lit}
df = pd.DataFrame(df_dict)
df.to_csv('translation_baryscore_{}_{}.csv'.format(data_type,year))

# Reproducing Correlation Scores

In [None]:
summ_level = pd.read_csv('translation_baryscore_{}_{}.csv'.format(data_type,year))

In [None]:
summ_level.sort_values('pearson',ascending =False).head(60)