# Analysis Translation

In [17]:
import os 
import json
import math
from tqdm import tqdm
import scipy.stats as stats
import pandas as pd
import seaborn as sns
from matplotlib import rcParams

# figure size in inches
rcParams['figure.figsize'] = 20.7,8.27

In [18]:

def split_div(string_key):
    name, type_, alpha, beta, temp, tf_idf = '', '', -1, -1, -1, False
    try:
        if string_key[0] == 'r':
            type_ = 'r'
        elif 'sim' in string_key:
            type_ = 'sim'
        else:
            type_ = 'normal'
        if 'True' in string_key:
            tf_idf = True
        splitted_key = string_key.split('_')
        temp = splitted_key[-2]
        temp = float(temp)

        if 'alpha_beta' in string_key:
            name = 'alpha_beta'
            try:
                alpha = float(splitted_key[-4])
                beta = float(splitted_key[-3])
            except:
                alpha = float(splitted_key[-5])
                beta = float(splitted_key[-4])
        elif 'alpha_div_real_' in string_key:
            name = 'alpha_div_real'
        elif 'alpha' in string_key:
            name = 'alpha'
            try:
                alpha = float(splitted_key[-3])
            except:
                beta = float(splitted_key[-4])
        elif 'beta' in string_key:
            name = 'beta'
            try:
                beta = float(splitted_key[-3])
            except:
                beta = float(splitted_key[-4])
        elif 'rao' in string_key:
            name = 'rao'
        elif 'js' in string_key:
            name = 'js'
        elif 'kl' in string_key:
            name = 'kl'
        elif 'l1' in string_key:
            name = 'l1'
        elif 'l2' in string_key:
            name = 'l2'
        elif 'l_infini' in string_key:
            name = 'l_infini'
        elif 'lp_0.5' in string_key:
            name = 'lp_0.5'
        elif 'lp_1' in string_key:
            name = 'lp_1'
        elif 'lp_2' in string_key:
            name = 'lp_2'
        elif 'lp_3' in string_key:
            name = 'lp_3'
        elif 'lp_inf' in string_key:
            name = 'lp_inf'

        else:
            raise NotImplementedError
    except:
        if string_key in ['human','utt_level', 'DataCoverage', 'Fluency', 'Relevance', 'TextStructure', 'litepyramid_recall',
                          'responsiveness', 'js-2', 'mover_score', 'meteor', 'bleu','chrf',
                          'rouge_we_3_p', 'rouge_we_3_r', 'rouge_we_3_f']:
            name = string_key
            temp = 1
            type_ = 'normal'
            tf_idf = True
        elif 'bert' in string_key or 'rouge' in string_key:
            name = string_key
            temp = 1
            type_ = 'normal'
            tf_idf = True
        else:
            name = None
    assert name != ''
    assert type_ != ''
    assert temp != -1
    return name, type_, alpha, beta, temp, tf_idf

import ast



def parse_depths(string_key):
    "bert-base-uncased_[11]_depth_2_0.3_10_projection"
    splited = string_key.split('_')
    print(splited)
    tf_idf = ""
    temp = splited[-4]
    beta = splited[-3]
    alpha = splited[-2]
    type_ = ast.literal_eval(splited[-6])[0]
    name  = splited[-1]
    return name ,type_, alpha, beta, temp, tf_idf  


def split_key_wasserstein(string_key):
    if "depth" in string_key:
        return parse_depths(string_key)
    
    if 'soft_matching' in string_key:
        strings = ['classic', 'sakoechiba', 'itakura']
        if 'classic_ot' in string_key:
            type_ = 'classic_ot'
        elif 'classic' in string_key:
            type_ = 'classic'
        elif 'sakoechiba' in string_key:
            type_ = 'sakoechiba'
        elif 'itakura' in string_key:
            type_ = 'itakura'
        else:
            raise NotImplementedError
        new_string = string_key.replace(type_, '').replace('soft_matching_', 'w_div_') + '_True'
        name = None
    elif 'hard_scores' in string_key:
        strings = ['precision', 'recall', 'f1', 'precision_w', 'recall_w', 'f1_w']
        if 'precision_w' in string_key:
            type_ = 'precision_w'
        elif 'recall_w' in string_key:
            type_ = 'recall_w'
        elif 'f1_w' in string_key:
            type_ = 'f1_w'
        elif 'precision' in string_key:
            type_ = 'precision'
        elif 'recall' in string_key:
            type_ = 'recall'
        elif 'f1' in string_key:
            type_ = 'f1'
        else:
            raise NotImplementedError
        new_string = string_key.replace(type_ + '_', '').replace('hard_scores_', 'w_div_') + '_True'
    else:
        raise NotImplementedError
    name, new_type_, alpha, beta, temp, tf_idf = split_div(new_string)
    print('Type', type_)
    type_ = '{}_{}'.format(new_type_, type_)
    return name, type_, alpha, beta, temp, tf_idf

def split_key_ot(string_key):

    if 'wasserstein_bar' in string_key:
        return string_key,'','','','',''  
        
    if 'ot' == string_key[:2] or 'py' ==string_key[:2] :
        name = string_key.split('_')[0]
        type_ = string_key.split('_')[1]
        
        alpha, beta, temp, tf_idf  = '','','',''
        return name ,type_, alpha, beta, temp, tf_idf 
    "newot_linfinity_0.5_10"
    "newot_linfinity_10_500"
    if string_key in ['human','utt_level', 'DataCoverage', 'Fluency', 'Relevance', 'TextStructure', 'litepyramid_recall',
                          'responsiveness', 'js-2', 'mover_score', 'meteor', 'bleu',
                          'rouge_we_3_p', 'rouge_we_3_r', 'rouge_we_3_f']:
        return string_key ,'', '', '', '', 10
    else :
        name, type_ = string_key.split('_')[1] , string_key.split('_')[0]
        alpha, beta, temp, tf_idf = 0,0,string_key.split('_')[2],string_key.split('_')[-1]
        try :
            tf_idf = float(tf_idf)
        except : 
            tf_idf= '100'
        return name ,type_, alpha, beta, temp, tf_idf  


In [45]:
year = '15'
index = 2
data_type = ['cs-en', 'de-en', 'ru-en', 'fi-en', 'ro-en', 'tr-en'][index]
file_path = '{}_{}_formated.json'.format(year,data_type)
print('Loading {} {}'.format(year,data_type))

Loading 15 ru-en


In [46]:
with open(file_path,'r') as file :
    all_data = json.load(file)

# Reproducing Correlation Computation Instance Level Correlation

In [47]:
final_correlations_spearman_lit = {}
final_correlations_pearson_lit = {}
final_correlations_kendall_lit = {}
ids = list(all_data.keys())[10]
sys = list(all_data[ids]['system'].keys())[0]
for metric_name,_ in tqdm(all_data[ids]['system'][sys]['scores'].items()):
        predicted = []
        utt_golden_scores = []
        for key_data,value_data in all_data.items():
                for key_system, value_system in value_data['system'].items():
                    try :
                        predicted.append(sum(sum(value_system['scores'][metric_name],[]))/len(sum(value_system['scores'][metric_name],[])))
                    except :
                        try :
                            predicted.append(sum(value_system['scores'][metric_name]))
                        except :
                            predicted.append(value_system['scores'][metric_name])
                    utt_golden_scores.append(value_system['scores']['human'])
        predicted_score = [0 if math.isnan(x) else x for x in predicted]
        predicted_score = [0 if math.isinf(x) else x for x in predicted_score]
        predicted_score = [0 if math.isinf(-x) else x for x in predicted_score]
        if len(predicted_score) > 0 and len(utt_golden_scores) > 0:
                final_correlations_spearman_lit[metric_name] =  abs(stats.spearmanr(predicted_score, utt_golden_scores)[0])
                final_correlations_pearson_lit[metric_name] = abs(stats.pearsonr(predicted_score, utt_golden_scores)[0])
                final_correlations_kendall_lit[metric_name] = abs(stats.kendalltau(predicted_score, utt_golden_scores)[0])


100%|██████████| 68/68 [00:00<00:00, 392.05it/s]


In [48]:
names, types,alphas,betas,temps,tf_idfs,pearson_lit,spearman_lit,kendall_lit = [],[],[],[],[],[],[],[],[]
for k,value in tqdm(final_correlations_spearman_lit.items()):
    if 'meteor' or 'chrf' in k :
            name= k 
            tf_idf,temp,alpha,beta,type_ = 1,1,1,1,1
    else :
            if k[0] in ['2','3','4']:
                k_ = k[2:]
                add = True
            else :
                k_ = k
            name, type_, alpha, beta, temp, tf_idf = split_div(k_)
            if add :
                name += k[0]
    names.append(name)
    tf_idfs.append(tf_idf)
    temps.append(temp)
    alphas.append(alpha)
    betas.append(beta)
    types.append(type_)
    kendall_lit.append(final_correlations_kendall_lit[k])
    
    pearson_lit.append(final_correlations_pearson_lit[k])
    spearman_lit.append(final_correlations_spearman_lit[k])


df_dict = {'names': names, 'types' : types,'betas':betas,'alphas':alphas,
           'temps' : temps,'pearson' : pearson_lit, 'kendall': kendall_lit,
           'spearman':spearman_lit}
df = pd.DataFrame(df_dict)
df.to_csv('translation_baryscore_{}_{}.csv'.format(data_type,year))

100%|██████████| 68/68 [00:00<00:00, 135428.62it/s]


# Reproducing Correlation Scores

In [49]:
summ_level = pd.read_csv('translation_baryscore_{}_{}.csv'.format(data_type,year))

In [50]:
summ_level.sort_values('pearson',ascending =False).head(40)

Unnamed: 0.1,Unnamed: 0,names,types,betas,alphas,temps,pearson,kendall,spearman
0,0,human,1,1,1,1,1.0,1.0,1.0
50,50,bert-base-mnli_idf_wsw_nbarycentersTrue_range(...,1,1,1,1,0.775465,0.56117,0.747443
51,51,bert-base-mnli_none_wsw_nbarycentersTrue_range...,1,1,1,1,0.773254,0.558092,0.748553
42,42,bert-base-mnli_idf_wsw_nbarycentersTrue_range(...,1,1,1,1,0.770709,0.555591,0.742595
43,43,bert-base-mnli_none_wsw_nbarycentersTrue_range...,1,1,1,1,0.76883,0.549723,0.738109
53,53,bert-base-mnli_none_wstopw_nbarycentersTrue_ra...,1,1,1,1,0.742405,0.528742,0.712813
41,41,bert_score_f1,1,1,1,1,0.740368,0.525669,0.705098
52,52,bert-base-mnli_idf_wstopw_nbarycentersTrue_ran...,1,1,1,1,0.740005,0.52677,0.712132
3,3,"roberta-base_idf_wsw_nbarycentersTrue_range(8,...",1,1,1,1,0.739696,0.543776,0.728865
45,45,bert-base-mnli_none_wstopw_nbarycentersTrue_ra...,1,1,1,1,0.739317,0.524938,0.71063
