In [None]:
import json

import numpy as np
import pandas as pd
from scipy.stats import kendalltau

from utils import read_data_into_dataframe, qualities

In [None]:
with open("data/scores_human_from_SummEval.json") as f:
    scores_human = json.load(f)

translations_summaries = []
with open("data/translations_summaries.jsonl") as f:
    for line in f:
        translations_summaries.append(json.loads(line))
        
translations_texts_refs = []
with open("data/translations_texts_refs.jsonl") as f:
    for line in f:
        translations_texts_refs.append(json.loads(line))

In [None]:
def get_samples(translations_summaries, translations_texts_refs, scores_human):
    """
    Returns:
        samples: List of 1700 samples, each sample is a dictionary. 
            It has id, the text and the summary, the rererence summaries, 
            and 4 human scores (from 3 experts from SummEval) - for the four qualities of the summary.
    """
    samples = []
    for i, translations_summary in enumerate(translations_summaries):
        summary = translations_summary['texts'][0]
        i_text = i // 17
        i_ref = i % 17
        if i_ref == 0:
            translations_text_refs = translations_texts_refs[i_text]
            id_text = translations_text_refs['id']
            text = translations_text_refs['texts'][0]
            references = translations_text_refs['texts'][1:]
        sample = {
            'id': id_text,
            'decoded': summary,
            'references': references,
            'text': text,
            'expert_annotations': [
                expert_annotations(scores_human, i, 0),
                expert_annotations(scores_human, i, 1),
                expert_annotations(scores_human, i, 2)
            ]
        }
        samples.append(sample)
    return samples


def expert_annotations(scores_human, id_sample, id_expert):
    scores = {
        'coherence': scores_human[0][id_expert][id_sample],
        'consistency': scores_human[1][id_expert][id_sample],
        'fluency': scores_human[2][id_expert][id_sample],
        'relevance': scores_human[3][id_expert][id_sample]
    }
    return scores


def create_nested(df, primary, secondary, values):

    return {k: f.set_index(secondary)[values].to_dict() for k, f in df.groupby(primary)}

In [None]:
expert = get_samples(translations_summaries, translations_texts_refs, scores_human)

In [None]:
measures = [
    "bertscores_F",
    "rougeL",
    "rouge1",
    "rouge2",
    "bleu",
    "jshannon",
    "blanc",
    "estime",
]

In [None]:
data = []
for i, sample in enumerate(expert):
    for idx, evaluator in enumerate(sample["expert_annotations"]):
        data.append(
            [
                i,
                idx,
                evaluator["coherence"],
                evaluator["consistency"],
                evaluator["fluency"],
                evaluator["relevance"],
            ]
        )

df = pd.DataFrame(data)
df.columns = [
    "sample_id",
    "expert_id",
    "coherence",
    "consistency",
    "fluency",
    "relevance",
]

scores = read_data_into_dataframe()
english = scores.loc[scores.language == "en"].reset_index(drop=True)

In [None]:
correlations = []
for ex_id in range(3):
    for metric in english.submetric.drop_duplicates().tolist():
        for quality in qualities:
            if metric in qualities:
                continue
            if metric not in measures:
                continue

            met_df = english.loc[english.submetric == metric]
            exp = df.loc[(df.expert_id == ex_id), ['sample_id', quality]].set_index('sample_id')
            
            merged = met_df.merge(exp, left_on='data_idx', right_index=True)
            correlation = kendalltau(merged.value, merged[quality]).correlation
            
            correlations.append(dict(
                metric=metric,
                quality=quality,
                expert_id=ex_id,
                correlation=correlation
            ))
            
            
margin = pd.DataFrame(correlations)
merged_stats = (
    margin
    .groupby(["metric", "quality"])["correlation"]
    .agg(["mean", np.std])
    .reset_index()
)

margin_max_min = (
    margin.groupby(["metric", "quality"])["correlation"]
    .agg(["max", "min"])
    .reset_index()
    .assign(difference=lambda x: x["max"] - x["min"])
)

In [None]:
diff_dict = margin_max_min.set_index(["metric", "quality"]).difference.to_dict()
stat_dict = merged_stats.set_index(["metric", "quality"])["std"].to_dict()

with open("data/margins.json", "w") as f:
    json.dump(
        dict(
            max_margin=create_nested(margin_max_min, "metric", "quality", "difference"),
            std=create_nested(merged_stats, "metric", "quality", "std"),
        ),
        f,
    )