In [1]:
import pickle
import warnings

import numpy as np
import pandas as pd
from scipy.stats import pearsonr
from sklearn.metrics import r2_score

warnings.filterwarnings("ignore")
from functools import reduce

### This notebook evalutes the moral relevance and moral polarity scores generated by the framework in Xie et al., 2019, by comparing them to empirical moral association scores from the Small World of Words dataset (SWOW).

In [2]:
files = pickle.load(open("./data/emnlp_scores_coha.p", "rb"))
files.keys()
df1 = files["moral_relevance"]
df1["property"] = ["previous_link"] * len(df1)

df2 = files["moral_polarity"]
df2["property"] = ["polarity"] * len(df2)

ts_df = pd.concat((df1, df2), ignore_index=True)
ts_df["words"] = ts_df["word"]
ts_df.drop(["word", "vector"], axis=1, inplace=True)
ts_df["outputs_z"] = ts_df["outputs"].apply(lambda p: np.log(p / (1 - p)))
ts_df = ts_df.loc[ts_df.year >= 1850].reset_index(drop=True)

In [3]:
properties = ["previous_link", "polarity"]
model_name = "bert-base-uncased"
reduce = "forward"
data_name = "coha"
train_sections = np.arange(0, 5)
loss_function_name = "likelihood_loss"
graph_strategy = "ppmi"
token_strategy = "frequency"
graph_version = 2
swow_version = 1
fill = "add"
train_section = 0
add_self_loops = True
section = "test"
baselines = False

In [4]:
test_results_path = "./data/SWOW_prediction/eval/"

In [5]:
results = []

In [6]:
for property in properties:
    target_dir = (
        test_results_path
        + f"{property}_{model_name}_{reduce}_{data_name}_{train_section}_{loss_function_name}_graph_{graph_strategy}_graph_version_{graph_version}_swow_version_{swow_version}_fill_{fill}_add_self_loops_{add_self_loops}_token_strategy_{token_strategy}_{section}.csv"
    )
    target_df = pd.read_csv(target_dir)
    property_ts_df = ts_df.loc[
        (ts_df.words.isin(target_df.words))
        & (ts_df.year == 1990)
        & (ts_df.property == property)
    ]
    property_ts_df["targets"] = property_ts_df.words.apply(
        lambda w: target_df.loc[target_df.words == w].targets.mean(),
    )
    r, p = pearsonr(property_ts_df.targets, property_ts_df.outputs)
    n = len(property_ts_df.words.unique())
    r2 = r2_score(property_ts_df.targets, property_ts_df.outputs)
    results.append({"property": property, "n": n, "r": r, "p": p, "r2": r2})

In [7]:
results = pd.DataFrame(results)

In [8]:
results

Unnamed: 0,property,n,r,p,r2
0,previous_link,897,0.425972,7.613413e-41,-3.087629
1,polarity,795,0.479478,6.106861e-47,-305.787226
