In [None]:
import json

import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

sns.set(color_codes=True)

This script expects a file of bootstrapped translation samples. It expects data in the form outline below. By level, it expects the following keys
1. `['kendall', 'spearman']`
2. `['coherence', 'consistency', 'fluency', 'relevance']`
3. `['blanc', 'estime', 'jshannon', 'berts', 'bleu', 'rouge1', 'rouge2', 'rougeLsum', 'rougeL', 'rouge3']`
4. `['en^en', 'en^de', 'de^en', 'en^es', 'es^en', 'en^fr', 'fr^en', 'en^it', 'it^en', 'en^af', 'af^en', 'en^hi', 'hi^en', 'en^ru', 'ru^en']`

In [None]:
# Sample data for one key at each level of the json

{
    "kendall": {
        "coherence": {
            "bleu": {
                "en^es": {
                    [
                        0.20872789089801919,
                        0.19846267533121098,
                        0.157583759578093,
                        0.21771535178618073,
                        0.20612461297547746,
                    ]
                }
            }
        }
    }
}

In [None]:
# ADD BOOTSTRAPPED FILE PATH HERE
fname = "XXXX.json"

In [None]:
langs = [
    "en^en",
    "en^de",
    "de^en",
    "en^es",
    "es^en",
    "en^fr",
    "fr^en",
    "en^it",
    "it^en",
    "en^af",
    "af^en",
    "en^hi",
    "hi^en",
    "en^ru",
    "ru^en",
]
measures_use = [
    "blanc",
    "estime",
    "jshannon",
    "berts",
    "bleu",
    "rougeLsum",
    "rouge1",
    "rouge2",
    "rouge3",
]
qualities = ["coherence", "consistency", "fluency", "relevance"]
correlation_types = ["kendall", "spearman"]


languages = ["en", "af", "de", "es", "fr", "hi", "it", "ru"]
language_names = [
    "English",
    "Afrikaans",
    "German",
    "Spanish",
    "French",
    "Hindi",
    "Italian",
    "Russian",
]
lang_colors = ["red", "pink", "orange", "yellow", "green", "blue", "cyan", "lime"]

In [None]:
with open(fname, "r") as f:
    corr_bs = json.load(f)

corr_bs.keys(), corr_bs["spearman"].keys(), corr_bs["spearman"][
    "relevance"
].keys(), corr_bs["spearman"]["relevance"]["blanc"].keys()


# Change the signs:
for name_corr, data_corr in corr_bs.items():
    for name_quality, data_quality in data_corr.items():
        for name_measure, data_measure in data_quality.items():
            negate = False
            if name_measure in ["estime", "jshannon"]:
                negate = True
            for name_lang, data_lang in data_measure.items():
                if negate:
                    data_lang = [-s for s in data_lang]
                corr_bs[name_corr][name_quality][name_measure][name_lang] = data_lang


def get_fraction_better(data_1, data_2):
    f = 0
    for c1, c2 in zip(data_1, data_2):
        if c1 > c2:
            f += 1
    return f / len(data_1)


data_pairwise = {}
for correlation_type in correlation_types:
    for quality in qualities:
        for language in languages[1:]:
            name_case = correlation_type + "^" + quality + "^" + language
            names_pairs = []
            data_comparisons = []
            for measure_1 in measures_use:
                for measure_2 in measures_use:
                    if measure_2 <= measure_1:
                        continue
                    name_pair = measure_1 + "^" + measure_2
                    names_pairs.append(name_pair)
                    data_1_en = corr_bs[correlation_type][quality][measure_1]["en^en"]
                    data_2_en = corr_bs[correlation_type][quality][measure_2]["en^en"]
                    f_better_en = get_fraction_better(data_1_en, data_2_en)
                    data_1_tr = corr_bs[correlation_type][quality][measure_1][
                        "en^" + language
                    ]
                    data_2_tr = corr_bs[correlation_type][quality][measure_2][
                        "en^" + language
                    ]
                    f_better_tr = get_fraction_better(data_1_tr, data_2_tr)
                    data_1_rt = corr_bs[correlation_type][quality][measure_1][
                        language + "^en"
                    ]
                    data_2_rt = corr_bs[correlation_type][quality][measure_2][
                        language + "^en"
                    ]
                    f_better_rt = get_fraction_better(data_1_rt, data_2_rt)
                    data_comparisons.append(
                        (f_better_en, f_better_tr, f_better_rt)
                    )  # original, translated, roundtrip
            data_pairwise[name_case] = (names_pairs, data_comparisons)
len(data_pairwise), len(
    data_pairwise["kendall" + "^" + "coherence" + "^" + "fr"][0]
), len(data_pairwise["kendall" + "^" + "coherence" + "^" + "fr"][1])


def get_survival_data(data_pairwise, p_low=0.025, p_high=0.975, n_round=3):
    survival_data = {}
    for name_case, case_names_data in data_pairwise.items():
        n_original = 0  # number of reliable original comparisons
        avg_diff_tr = 0
        avg_diff_rt = 0
        data_case = case_names_data[1]
        for dat in data_case:
            if dat[0] < p_low or dat[0] > p_high:
                n_original += 1
                diff = abs(dat[1] - dat[0])
                avg_diff_tr += diff
                diff = abs(dat[2] - dat[0])
                avg_diff_rt += diff
        f_original = round(n_original / len(data_case), n_round)
        avg_diff_tr /= len(data_case)
        avg_diff_rt /= len(data_case)
        avg_diff_tr = round(avg_diff_tr, n_round)
        avg_diff_rt = round(avg_diff_rt, n_round)
        survival_data[name_case] = (f_original, avg_diff_tr, avg_diff_rt)
    return survival_data


survival_data = get_survival_data(data_pairwise)

map_lang_color = {}
for lang, color in zip(languages[1:], lang_colors[1:]):
    map_lang_color[lang] = color

map_quality_shape = {
    "consistency": "s",
    "relevance": "^",
    "fluency": "o",
    "coherence": "P",
}

lang_name_dict = {k: v for k, v in zip(languages, language_names)}


blob_size = 100
alp = 0.7
x_data, y_data = [], []
qual_data, lang_data = [], []
colors, shapes = [], []
for name_case, survival_case in survival_data.items():
    names_case = name_case.split("^")
    if names_case[0] != "kendall":
        break
    qual_data.append(names_case[1])
    lang_data.append(names_case[2])
    color = map_lang_color[names_case[2]]
    colors.append(color)
    shape = map_quality_shape[names_case[1]]
    shapes.append(shape)
    x_data.append(survival_case[1])
    y_data.append(survival_case[2])

# Plot:
size_x = 6
size_y = 6
marksize = 15

plt.figure(figsize=(size_x, size_x))
for i in range(len(qualities) * (len(languages) - 1)):
    plt.scatter(
        x=x_data[i],
        y=y_data[i],
        color=colors[i],
        marker=shapes[i],
        s=blob_size,
        alpha=alp,
    )

    legend_qualities = [
        Line2D(
            [0],
            [0],
            marker="s",
            color="grey",
            label="consistency",
            markerfacecolor="grey",
            markersize=marksize,
            linestyle="None",
        ),
        Line2D(
            [0],
            [0],
            marker="^",
            color="grey",
            label="relevance",
            markerfacecolor="grey",
            markersize=marksize,
            linestyle="None",
        ),
        Line2D(
            [0],
            [0],
            marker="o",
            color="grey",
            label="fluency",
            markerfacecolor="grey",
            markersize=marksize,
            linestyle="None",
        ),
        Line2D(
            [0],
            [0],
            marker="P",
            color="grey",
            label="coherence",
            markerfacecolor="grey",
            markersize=marksize,
            linestyle="None",
        ),
    ]

legend_languages = []
for lang, color in zip(languages[1:], lang_colors[1:]):
    line = Line2D(
        [0],
        [0],
        marker="o",
        color=color,
        label=lang_name_dict.get(lang),
        markerfacecolor=color,
        markersize=marksize,
        linestyle="None",
    )
    legend_languages.append(line)
legend1 = plt.legend(
    handles=legend_qualities,
    loc="upper center",
    bbox_to_anchor=(0.15, 1.37),
    title="Qualities",
    fontsize=14,
)
legend2 = plt.legend(
    handles=legend_languages,
    loc="upper center",
    bbox_to_anchor=(0.7, 1.37),
    title="Languages",
    fontsize=14,
    ncol=2,
)
plt.gca().add_artist(legend1)

plt.xlabel("Translated", fontsize=15)
plt.ylabel("Roundtrip translated", fontsize=15)
plt.yscale("log")
plt.xscale("log")
plt.xlim(5e-4, 0.35)
plt.ylim(5e-4, 0.35)
plt.axline((0, 0), (1, 1), linestyle="dotted", color="black", alpha=0.5)
plt.gca().set_aspect("equal", adjustable="box")

plt.savefig("roundtrip_log", bbox_inches="tight")