In [58]:
import pandas as pd
import numpy as np

import re

from tqdm import tqdm
tqdm.pandas()

from rouge_score import rouge_scorer
from bert_score import score
from nltk.translate import bleu_score

import statistics

import itertools

# Data Loading 

In [6]:
DATA_SOURCE = '../data/data_clean/polisumm_final.csv'

In [7]:
data = pd.read_csv(DATA_SOURCE)

In [22]:
data = data[~data['left_sum'].isna()]

# Testing Maximum Extractive Score 

In [26]:
sub_data = data[['left_sum', 'right_sum', 'all_texts']]

In [27]:
sub_data['full_sum'] = sub_data['left_sum'] + '|' + sub_data['right_sum']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [28]:
sub_data.dropna()

Unnamed: 0,left_sum,right_sum,all_texts,full_sum
0,The left supports cash payments and argues tha...,The right is generally supportive of helping c...,Mnuchin: Family of 4 could get $3K under virus...,The left supports cash payments and argues tha...
1,The left is supportive of both policies.,The right is critical of both policies.,Analysis | What Trump's Trillion-Dollar Bailou...,The left is supportive of both policies.|The r...
2,"The left is skeptical of Zuckerberg’s motives,...",The right is disturbed by the free speech impl...,5 major Facebook stories broke over the last f...,"The left is skeptical of Zuckerberg’s motives,..."
3,The left is optimistic about Biden’s chances.,The right is cautiously optimistic about Trump...,Biden maintains a 90% chance of winning.\n\nht...,The left is optimistic about Biden’s chances.|...
4,The left is optimistic about Biden’s chances.,The right is cautiously optimistic about Trump...,"Biden has a 90% chance of winning, according t...",The left is optimistic about Biden’s chances.|...
...,...,...,...,...
1186,The left worries that the case will be used to...,"The right generally sympathizes with Depp, and...",Jurors hearing testimony in #JohnnyDepp's libe...,The left worries that the case will be used to...
1189,The left is generally supportive of the decisi...,The right is generally supportive of the decis...,The best thing the United States and broader i...,The left is generally supportive of the decisi...
1190,The left generally supports the editor’s firin...,The right opposes the editor’s firing and the ...,Democratic Governors Will Now Lead a Majority ...,The left generally supports the editor’s firin...
1191,The left highlights the structural impediments...,The right is critical of the Biden administrat...,"Apparently, Trump was bad for foreign relation...",The left highlights the structural impediments...


# Evaluation 

In [67]:
punct_regex = '\s*[\.\?\!\|\n\:]\s*[\.\?\!\|\n\:]*\s*'
url_regex = r'https?:\/\/\S*'


### ROUGE Scores 

In [40]:
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2'], use_stemmer = True)
def get_max_rouge(row):
    left_sum = row['left_sum']
    right_sum = row['right_sum']
    
    text_l = re.split(punct_regex, row['all_texts'])
    
    lscores, rscores = [], []
    for text in text_l:
        lscores.append(scorer.score(left_sum, text))
        rscores.append(scorer.score(right_sum, text))
        
    max_l = get_max_scores(lscores)
    max_r = get_max_scores(rscores)
    
    return max_l, max_r
        
def get_max_scores(scores):
    scores.sort(key = lambda score: score['rouge2'].fmeasure, reverse = True)
    return scores[0]

def reformat_rouge(rouge_scores):
    rouge1 = {'prec': [], 'rec': [], 'f1': []}
    rouge2 = {'prec': [], 'rec': [], 'f1': []}
    for rscore in rouge_scores:
        rouge1['prec'].append(rscore['rouge1'].precision)
        rouge1['rec'].append(rscore['rouge1'].recall)
        rouge1['f1'].append(rscore['rouge1'].fmeasure)

        rouge2['prec'].append(rscore['rouge2'].precision)
        rouge2['rec'].append(rscore['rouge2'].recall)
        rouge2['f1'].append(rscore['rouge2'].fmeasure)
    
    return rouge1, rouge2

In [36]:
all_rouge_scores = data.progress_apply(get_max_rouge, axis = 1)

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:06<00:00,  1.25s/it]


In [45]:
l_scores = [rscore[0] for rscore in all_rouge_scores]
r_scores = [rscore[1] for rscore in all_rouge_scores]

In [46]:
l_rscores1, l_rscores2 = reformat_rouge(l_scores)
r_rscores1, r_rscores2 = reformat_rouge(r_scores)

In [50]:
l_rscore1_f = {key: statistics.mean(val) for key, val in l_rscores1.items()}
l_rscore2_f = {key: statistics.mean(val) for key, val in l_rscores2.items()}
r_rscore1_f = {key: statistics.mean(val) for key, val in r_rscores1.items()}
r_rscore2_f = {key: statistics.mean(val) for key, val in r_rscores2.items()}

### Diversity (Self-Bleu) 

In [105]:
def get_self_bleu_min(split_preds):
    self_bleus = []
    for pred_pair in split_preds:
        bleu   = bleu_score.sentence_bleu([pred_pair[0]], pred_pair[1], weights = (1.0,))
        self_bleus.append(bleu)
    return min(self_bleus)

def get_pair_self_bleus(row):
    all_texts = row['all_texts']
    all_texts = re.sub(url_regex, '', all_texts)
    text_l = re.split(punct_regex, str(all_texts))
    
    text_pairs = [(text_l[i], text_l[j]) for i in range(len(text_l)) for j in range(i + 1, len(text_l))] 
    
    self_bleu_min = get_self_bleu_min(text_pairs)
    
    return self_bleu_min

In [106]:
self_bleus = data.progress_apply(get_pair_self_bleus, axis = 1)

100%|██████████████████████████████████████████████████████████████████████████████| 899/899 [4:53:53<00:00, 19.61s/it]


In [107]:
self_bleu = statistics.mean(self_bleus)

# Novel N-Grams 

In [89]:
def get_ngrams(text, ngram = 1):
    return zip(*[str(text).split()[i:] for i in range(ngram)])

def get_novel_ngrams(ref, source, ngrams = (1, 2, 3, 4)):
    diffs = []
    for ngram in ngrams:
        ref_ng = get_ngrams(ref, ngram = ngram)
        src_ng = get_ngrams(source, ngram = ngram)
        diff = set(ref_ng).difference(src_ng)
        diffs.append(len(diff))
    
    return diffs
        
def calc_novel_ngrams_row(row):
    text = row['all_texts']
    refl = row['left_sum']
    refr = row['right_sum']
    
    nov_ngramsl = get_novel_ngrams(refl, text)
    nov_ngramsr = get_novel_ngrams(refr, text)
    
    return nov_ngramsl, nov_ngramsr

In [90]:
novel_ngrams = data.progress_apply(calc_novel_ngrams_row, axis = 1)

100%|████████████████████████████████████████████████████████████████████████████████| 899/899 [00:12<00:00, 71.79it/s]


In [91]:
novel_l = [nngrams[0] for nngrams in novel_ngrams]
novel_r = [nngrams[1] for nngrams in novel_ngrams]

In [92]:
tot_ref_len_l = sum(data['left_sum'].str.split().apply(lambda tl: len(tl) if isinstance(tl, list) else 1))
tot_ref_len_r = sum(data['right_sum'].str.split().apply(lambda tl: len(tl) if isinstance(tl, list) else 1))

In [93]:
novel_l_dict = {1: 0, 2: 0, 3: 0, 4: 0}
novel_r_dict = {1: 0, 2: 0, 3: 0, 4: 0}

for nov_list in novel_l:
    for i, nov_cnt in enumerate(nov_list):
        novel_l_dict[i+1] += nov_cnt
for nov_list in novel_r:
    for i, nov_cnt in enumerate(nov_list):
        novel_r_dict[i+1] += nov_cnt

In [94]:
novel_l_dict = {key: (1. * val) / (tot_ref_len_l - (key - 1.)) for key, val in novel_l_dict.items()}
novel_r_dict = {key: (1. * val) / (tot_ref_len_r - (key - 1.)) for key, val in novel_r_dict.items()}

# All Scores 

In [103]:
print(f'                     LEFT      RIGHT')
print(f'Self-Bleu:           {self_bleu:.3f}')
# print(f'Bert Score (Prec):  {bprec:.3f}')
# print(f'            (Rec):  {brec:.3f}')
# print(f'             (F1):  {bf1:.3f}')
print(f'Rouge1 Score (Prec): {l_rscore1_f["prec"]:.3f}     {r_rscore1_f["prec"]:.3f}')
print(f'              (Rec): {l_rscore1_f["rec"]:.3f}     {r_rscore1_f["rec"]:.3f}')
print(f'               (F1): {l_rscore1_f["f1"]:.3f}     {r_rscore1_f["f1"]:.3f}')
print(f'Rouge2 Score (Prec): {l_rscore2_f["prec"]:.3f}     {r_rscore2_f["prec"]:.3f}')
print(f'              (Rec): {l_rscore2_f["rec"]:.3f}     {r_rscore2_f["rec"]:.3f}')
print(f'               (F1): {l_rscore2_f["f1"]:.3f}     {r_rscore2_f["f1"]:.3f}')
print(f'\n---Abstractiveness---')
print(f' 1-gram:           {novel_l_dict[1]:.4f}     {novel_r_dict[1]:.4f}     {(novel_l_dict[1] + novel_r_dict[1])/2:.4f}')
print(f' 2-gram:           {novel_l_dict[2]:.4f}     {novel_r_dict[2]:.4f}     {(novel_l_dict[2] + novel_r_dict[2])/2:.4f}')
print(f' 2-gram:           {novel_l_dict[3]:.4f}     {novel_r_dict[3]:.4f}     {(novel_l_dict[3] + novel_r_dict[3])/2:.4f}')
print(f' 2-gram:           {novel_l_dict[4]:.4f}     {novel_r_dict[4]:.4f}     {(novel_l_dict[4] + novel_r_dict[4])/2:.4f}')

                     LEFT      RIGHT
Self-Bleu:           0.999
Rouge1 Score (Prec): 0.303     0.401
              (Rec): 0.280     0.247
               (F1): 0.266     0.289
Rouge2 Score (Prec): 0.188     0.224
              (Rec): 0.156     0.116
               (F1): 0.158     0.144

---Abstractiveness---
 1-gram:           0.3049     0.3082     0.3066
 2-gram:           0.7637     0.7833     0.7735
 2-gram:           0.8498     0.8559     0.8528
 2-gram:           0.8178     0.8162     0.8170
