In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from scipy.stats import spearmanr, kendalltau

from utils import read_data_into_dataframe, qualities

sns.set(color_codes=True)

# Read Data

In [None]:
df = read_data_into_dataframe()

df['from'] = df.language.apply(lambda x: x.split('^')[0])
df['to'] = df.language.apply(lambda x: x.split('^')[-1])
df['language'] = df.to

In [None]:
auto_measures = [
    "bertscores_F",
    "rougeL",
    "rouge1",
    "rouge2",
    "bleu",
    "jshannon",
    "blanc",
    "estime",
]

renamer = {
    'bertscores_F': 'BERTScore',
    'rougeL': "ROUGE-L",
    'rouge1': "ROUGE-1",
    'rouge2': "ROUGE-2",
    'bleu': "BLEU",
    'jshannon':"JS",
    'blanc':"BLANC",
    'estime': 'ESTIME'
}
figure1_columns = ["Metric", "EN-DE", "EN-FR", "EN-ES", "EN-IT", "EN-AF", "EN-HI", "EN-RU"]

negative_corrs = ["jshannon", "estime"]
non_en = df.loc[df.language != "en", "language"].drop_duplicates().tolist()

# Figure 1

In [None]:
table2_dict = []
for met in auto_measures:
    for qual in qualities:

        expert = df.loc[
            (df.submetric == qual) & (df.metric == "experts") & (df.language == "en"),
            "value",
        ]
        other_met = df.loc[(df.submetric == met) & (df.language == "en"), "value"]
        
        sign_factor = -1 if met in negative_corrs else 1
        sp_corr, sp_p = spearmanr(expert, other_met)
        kt_corr = kendalltau(expert, other_met).correlation

        table2_dict.append(
            dict(
                submetric=met,
                quality=qual[:3],
                spearman_corr=sp_corr * sign_factor,
                spearman_p=sp_p,
                kendall_corr=kt_corr * sign_factor,
            )
        )

tbl2_df = pd.DataFrame(table2_dict).rename(columns={'language' : 'Language'})
tbl2_df['Metric'] = tbl2_df.submetric.apply(renamer.get)

In [None]:
table2_sp = (
    tbl2_df
    .pivot(index="Metric", columns="quality")["spearman_corr"]
    .round(2)
    .reset_index()
    .sort_values("Metric")
)

plt.figure(figsize=(8,5))
ax = sns.heatmap(
    table2_sp.set_index('Metric'), 
    annot=True,
    cmap="YlGnBu",
    annot_kws={"fontsize":16},
    fmt='0.2f'
)
cbar = ax.collections[0].colorbar
cbar.ax.tick_params(labelsize=16)
plt.xticks(size=16)
plt.yticks(size=16)
plt.xlabel("Quality", size=16)
plt.ylabel("Metric", size=16)
plt.title(r"Spearman's $\rho$ Correlation", size=18)
plt.savefig('table2_spearman.pdf', bbox_inches='tight', format='pdf')

In [None]:
table2_kt = (
    tbl2_df
    .pivot(index="Metric", columns="quality")["kendall_corr"]
    .round(2)
    .reset_index()
    .sort_values("Metric")
)

plt.figure(figsize=(8,5))
ax = sns.heatmap(
    table2_kt.set_index('Metric'), 
    annot=True,
    cmap="YlGnBu",
    annot_kws={"fontsize":16},
    fmt='0.2f'
)
cbar = ax.collections[0].colorbar
cbar.ax.tick_params(labelsize=16)
plt.xticks(size=16)
plt.yticks(size=16)
plt.xlabel("Quality", size=16)
plt.ylabel(" ", size=16)
plt.title(r"Kendall's $\tau$ Correlation", size=18)
plt.savefig('table2_kendall.pdf', bbox_inches='tight', format='pdf')

# Figure 2

In [None]:
table1_dict = []
for met in auto_measures:
    for lang in non_en:
        
        english = df.loc[(df.submetric == met) & (df.language == "en"), "value"]
        other_lang = df.loc[(df.submetric == met) & (df.language == lang), "value"]

        sp_corr, sp_p = spearmanr(english, other_lang)
        kt_corr = kendalltau(english, other_lang).correlation

        table1_dict.append(
            dict(
                submetric=met,
                language=f"EN-{lang.upper()}",
                spearman_corr=sp_corr,
                spearman_p=sp_p,
                kendall_corr=kt_corr,
            )
        )

tbl1_df = pd.DataFrame(table1_dict).rename(columns={'language' : 'Language'})
tbl1_df['Metric'] = tbl1_df.submetric.apply(renamer.get)

In [None]:
table1_sp = (
    tbl1_df
    .pivot(index="Metric", columns="Language")["spearman_corr"]
    .round(2)
    .reset_index()
    .sort_values("Metric")
    .loc[:, figure1_columns]
)


plt.figure(figsize=(8,5))
ax = sns.heatmap(
    table1_sp.set_index('Metric'), 
    annot=True,
    cmap="YlGnBu",
    annot_kws={"fontsize":16},
    fmt='0.2f'
)
cbar = ax.collections[0].colorbar
cbar.ax.tick_params(labelsize=16)
plt.xticks(size=16)
plt.yticks(size=16)
plt.xlabel("Language", size=16)
plt.ylabel("Metric", size=16)
plt.title(r"Spearman's $\rho$ Correlation", size=18)
plt.savefig('table1_spearman.pdf', bbox_inches='tight', format='pdf')



In [None]:
table1_kt = (
    tbl1_df
    .pivot(index="Metric", columns="Language")["kendall_corr"]
    .round(2)
    .reset_index()
    .sort_values("Metric")
    .loc[:, figure1_columns]
)

plt.figure(figsize=(8,5))
ax = sns.heatmap(
    table1_kt.set_index('Metric'), 
    annot=True,
    cmap="YlGnBu",
    annot_kws={"fontsize":16},
    fmt='0.2f'
)
cbar = ax.collections[0].colorbar
cbar.ax.tick_params(labelsize=16)
plt.xticks(size=16)
plt.yticks(size=16)
plt.xlabel("Language", size=16)
plt.ylabel("  ", size=16)
plt.title(r"Kendall's $\tau$ Correlation", size=18)
plt.savefig('table1_kendall.pdf', bbox_inches='tight', format='pdf')