In [None]:
import pandas as pd
from tokenize_uk import tokenize_words
import string
from transformers import AutoTokenizer, AutoModelForMaskedLM
import numpy as np
from tree_stem import stem_word

stop = list(string.punctuation)

## Contents:
* [Fragment-based scores](#first-bullet)
* [BERTScore](#second-bullet)
* [ROUGE scores](#third-bullet)

In [None]:
def calculate_fragments():

    subsequences = []
    
    i = 0
    while i < len_summary:
        res = (0,0)

        j = 0
        while j < len_article:

            if seq_1[i] == seq_2[j]:
                i_i = i
                j_j = j
                while i_i < len_summary and j_j < len_article and seq_1[i_i] == seq_2[j_j]:
                    j_j += 1
                    i_i += 1
                old_length = res[1] - res[0]
                if i_i - i > old_length:
                    res = (i, i_i)
                j = j_j
            else:
                j += 1

        cur_length = res[1] - res[0]
        i += max([cur_length, 1])
        if cur_length:
            subsequences.append(res)

    return(subsequences)

In [None]:
def calculate_coverage():
    
    extractive_fragment_length = 0

    for s in fragments:
        extractive_fragment_length += s[1] - s[0]    
    
    return extractive_fragment_length / len(summary_tokens)

In [None]:
def calculate_density():

    extractive_fragment_length = 0

    for s in fragments:
        l = s[1] - s[0]
        extractive_fragment_length += l**2

    return extractive_fragment_length / len(summary_tokens)

## Fragments-based scores <a class="anchor" id="first-bullet"></a>

In [None]:
metrics = []

In [None]:
versions = ["experiment0","experiment1","experiment2","experiment2_2","experiment3","experiment4", "experiment5"]

In [None]:
for version in versions:
    DF_NAME = f"results/{version}.csv"
    VERSION = version

    df = pd.read_csv(DF_NAME, index_col=0)
    
    
    for ind, row in df.iterrows():

        summary = row["output"]
        article = row["2"]


        seq_1 = tokenize_words(summary)
        seq_1 = [i for i in seq_1 if i not in stop]
        summary_tokens = seq_1

        seq_2 = tokenize_words(article)
        seq_2 = [i for i in seq_2 if i not in stop]
        article_tokens = seq_2

        len_summary = len(seq_1)
        len_article = len(seq_2)

        fragments = calculate_fragments()
        compression = len_article / len_summary

        coverage = calculate_coverage()
        abstractivity = 1 - coverage

        density = calculate_density()

        metrics.append((row["1"], round(compression,4), round(coverage,4), 
                        round(abstractivity,4), round(density,4), len_summary, len_article, VERSION))

In [None]:
metrics_df = pd.DataFrame(metrics, columns=["post_id","compression", "coverage","abstractivity","density","tokens_summary","tokens_article", "version"])

## BERTScore <a class="anchor" id="second-bullet"></a>

In [None]:
dd = pd.read_csv("test_data.csv", index_col=0)
dd = dd.set_index("post_id")
d = dd["summary"].to_dict()

In [None]:
tokenizer = AutoTokenizer.from_pretrained("youscan/ukr-roberta-base")
model = AutoModelForMaskedLM.from_pretrained("youscan/ukr-roberta-base")

In [None]:
def calculate_score(txt, summary):
    text1 = txt.lower()
    text2 = summary.lower()
    inputs1 = tokenizer(text1, return_tensors="pt", padding=True, truncation=True)
    inputs2 = tokenizer(text2, return_tensors="pt", padding=True, truncation=True)

    outputs1 = model(**inputs1, output_hidden_states=True)
    outputs2 = model(**inputs2, output_hidden_states=True)

    embeddings1 = outputs1.hidden_states[-1].mean(dim=1).detach().numpy()
    embeddings2 = outputs2.hidden_states[-1].mean(dim=1).detach().numpy()

    similarity = np.dot(embeddings1, embeddings2.T) / (np.linalg.norm(embeddings1) * np.linalg.norm(embeddings2))
    
    return(round(similarity[0][0],4))

In [None]:
for version in versions:
    DF_NAME = f"results/{version}.csv"
    VERSION = version
    for ind, row in df.iterrows():

        txt = row["output"]
        post_id = row["1"]
        summary = d[post_id]
        try:
            bert = calculate_score(txt, summary)
        except:
            bert = np.nan
        metrics_df.loc[(metrics_df["version"]==VERSION)&(metrics_df["post_id"]==post_id),"bertscore"] = bert

## ROUGE scores <a class="anchor" id="third-bullet"></a>

In [1]:
def rouge_1(s1, s2):
    matches = 0
    for w in s1:
        if w in s2:
            matches +=1
    tokens1 = len(s1)
    tokens2 = len(s2)

    if tokens1 == 0 or tokens2 == 0:
        return (0,0,0)
    
    recall = matches/tokens1
    precision = matches/tokens2

    if precision + recall == 0:
        return ((precision, recall, 0))

    f1 = 2 * (precision * recall) / (precision+ recall)
    
    return ((precision, recall, f1))


def rouge_l(s1, s2):    
    len_s1 = len(s1)
    len_s2 = len(s2)
    
    if len_s1 == 0 or len_s2 == 0:
        return (0,0,0)
    
    lcs = lcs_length(s1, s2)
    
    recall = lcs/len_s1
    precision = lcs/len_s2

    if precision + recall == 0:
        return ((precision, recall, 0))

    f1 = 2 * (precision * recall) / (precision + recall)
    return ((precision, recall, f1))


def lcs_length(a, b):
    table = [[0] * (len(b) + 1) for _ in range(len(a) + 1)]
    for i, ca in enumerate(a, 1):
        for j, cb in enumerate(b, 1):
            table[i][j] = (
                table[i - 1][j - 1] + 1 if ca == cb else
                max(table[i][j - 1], table[i - 1][j]))
    return table[-1][-1]

def rouge_2(s1, s2):
    
    bigrams1 = list(zip(s1, s1[1:]))
    bigrams2 = list(zip(s2, s2[1:]))    
    matches = 0

    for w in bigrams1:
        if w in bigrams2:
            matches +=1
    tokens1 = len(bigrams1)
    tokens2 = len(bigrams2)
    
    if tokens1 == 0 or tokens2 == 0:
        return (0,0,0)
    recall = matches/tokens1

    precision = matches/tokens2
    if precision + recall == 0:
        return ((precision, recall, 0))

    f1 = 2 * (precision * recall) / (precision+ recall)    
    return ((precision, recall, f1))

In [None]:
def calculate_scores(reference, candidate):

    s1 = tokenize_words(reference)
    s2 = tokenize_words(candidate)

    s1 = [stem_word(i) for i in s1 if i not in stop]
    s2 = [stem_word(i) for i in s2 if i not in stop]

    r1 = rouge_1(s1,s2)
    r2 = rouge_2(s1,s2)

    r_l = rouge_l(s1,s2)

    return(r1, r2, r_l)

In [None]:
for version in versions:
    DF_NAME = f"results/{version}.csv"
    VERSION = version
    for ind, row in df.iterrows():

        txt = row["output"]
        post_id = row["1"]
        summary = d[post_id]
        try:
            r = calculate_scores(summary,txt)

            r1 = r[0][2]
            r2 = r[1][2]
            rl = r[2][2]
        except:
            r1,r2,rl = np.nan
        metrics_df.loc[(metrics_df["version"]==VERSION)&(metrics_df["post_id"]==post_id),"r1"] = r1
        metrics_df.loc[(metrics_df["version"]==VERSION)&(metrics_df["post_id"]==post_id),"r2"] = r2
        metrics_df.loc[(metrics_df["version"]==VERSION)&(metrics_df["post_id"]==post_id),"rl"] = rl

In [None]:
metrics_df.to_csv("experiments_comparison.csv")