In [1]:
import time
import spacy
import lftk
import pandas as pd
from datasets import load_dataset

In [2]:
# load a trained pipeline of your choice from spacy
nlp = spacy.load("en_core_web_sm")

# readability formula scores - we can always add more general linguistic features
metrics = lftk.search_features(family="readformula", return_format = "list_key")

In [3]:
# data: generated summaries, gold: human-written summaries
data = pd.read_csv('../output/baseline_test.csv', usecols=['summary_generated'])
gold = load_dataset("FiscalNote/billsum")["test"].to_pandas()

In [4]:
# returns a dictionary of readability scores from LFTK
def readability_score_single(text):
    doc = nlp(text)
    LFTK = lftk.Extractor(docs = doc)
    features = LFTK.extract(features = metrics)
    return features

def readability_eval(df, column):
    start_time = time.time()
    df["scores"] = df[column].apply(readability_score_single)
    print(f"{time.time() - start_time} sec to run")
    return

In [5]:
# replaces the dictionary column with exploded keys as new columns
def convert_cols(df, column):
    return df.join(
        pd.json_normalize(
            df[column], record_prefix="gold_"
        )
    ).drop(
        column,
        axis='columns'
    )

In [6]:
readability_eval(data, "summary_generated")
readability_eval(gold, "summary")

81.62794089317322 sec to run
97.93178701400757 sec to run


In [7]:
gen = convert_cols(data, "scores")
gold = convert_cols(gold, "scores")

In [20]:
# gold.describe().to_csv("gold_describe.csv")
gold.describe().compare(gen.describe(), keep_equal=True, align_axis=1, result_names=("gold", "generated"))

# aggr.to_csv("baseline_gold_compare.csv", index=False)

Unnamed: 0_level_0,fkre,fkre,fkgl,fkgl,fogi,fogi,smog,smog,cole,cole,auto,auto
Unnamed: 0_level_1,gold,generated,gold,generated,gold,generated,gold,generated,gold,generated,gold,generated
mean,12.950785,5.292479,22.548614,26.011575,28.701746,32.256271,11.587442,12.407292,15.162678,14.809797,26.354612,30.819341
std,28.561424,39.147077,10.030598,14.147657,10.336453,14.545976,3.764673,4.721686,2.216489,2.464433,12.738404,17.997384
min,-333.371,-197.615,6.311,6.217,10.15,8.969,0.0,0.0,7.802,6.261,5.72,6.407
25%,2.67,-8.085,16.792,17.666,22.815,23.667,9.329,9.329,13.66,13.118,19.089,20.248
50%,17.009,12.846,20.577,22.472,26.733,28.619,11.22,11.892,15.045,14.684,23.714,26.103
75%,29.321,29.003,25.451,29.742,31.892,36.0,13.398,14.75,16.556,16.32,29.975,35.103
max,80.098,81.956,154.161,97.404,163.263,105.737,35.676,36.579,25.251,24.759,194.066,122.723


In [15]:
# gen.describe().to_csv("gen_describe.csv")
gen.describe()

Unnamed: 0,fkre,fkgl,fogi,smog,cole,auto
count,3269.0,3269.0,3269.0,3269.0,3269.0,3269.0
mean,5.292479,26.011575,32.256271,12.407292,14.809797,30.819341
std,39.147077,14.147657,14.545976,4.721686,2.464433,17.997384
min,-197.615,6.217,8.969,0.0,6.261,6.407
25%,-8.085,17.666,23.667,9.329,13.118,20.248
50%,12.846,22.472,28.619,11.892,14.684,26.103
75%,29.003,29.742,36.0,14.75,16.32,35.103
max,81.956,97.404,105.737,36.579,24.759,122.723
