In [2]:
import json

human_test_set = [105,888,673,177,818,183,700,592,682,622,993,922,936,218,360,826,512,131,897,743]

persp_data_path = "../data/dataset/perspective_pool_v0.2.json"
claim_data_path = "../data/dataset/perspectrum_with_answers_v0.2.json"

persps = json.load(open(persp_data_path))
claims = json.load(open(claim_data_path))

persp_dict = {}
claim_dict = {}
for p in persps:
    persp_dict[p["pId"]] = p["text"]

for c in claims:
    claim_dict[c["cId"]] = c

In [12]:
# Make gold annotation
data = []

for cid in human_test_set:
    cur_c = claim_dict[cid]
    c_text = cur_c['text']
    
    p_clusters = [(p['pids'], p['stance_label_3']) for p in cur_c['perspectives']]
    
    p_title_clusters = []
    for c, stance in p_clusters:
        c = [(pid, persp_dict[pid]) for pid in c]
        p_title_clusters.append((c, stance)) 
        
    data.append({
        "claim_id": cid,
        "claim_text": c_text,
        "gold_perspectives" : p_title_clusters
    })
    
out_path = "../data/dataset/human_eval/equivalence_human_eval_gold.json"
json.dump(data, open(out_path, 'w'))
    

In [16]:
# Make the table for stance annotation
import csv

out_path = "../data/dataset/human_eval/stance_human_eval.csv"

f = open(out_path, 'w')
fieldnames = ['claim_id', 'claim_title', 'perspective_id', 'perpsective_title', 'gold_stance']

writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()

for c in data:
    
    for clusters in c["gold_perspectives"]:
        stance = clusters[1]
        for p, title in clusters[0]:
            writer.writerow({
                "claim_id": c["claim_id"],
                "claim_title": c["claim_text"],
                "perspective_id": p,
                "perpsective_title": title,
                "gold_stance": stance
            })


In [30]:
# Make spreadsheet for equivalence
import csv
import itertools

out_path = "../data/dataset/human_eval/equivalence_human_eval.csv"

f = open(out_path, 'w')
fieldnames = ['claim_id', 'claim_title', 'perspective_id_1', 'perpsective_title_1', 'perspective_id_2', 'perpsective_title_2']

writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()

for c in data:
    all_p = []

    for clusters in c["gold_perspectives"]:
        for p, title in clusters[0]:
            all_p.append((p, title))
    
    l_p = len(all_p)
    
    cartesian = list(itertools.combinations(all_p,2))
        
    for p1, p2 in cartesian:
                
        writer.writerow({
            "claim_id": c["claim_id"],
            "claim_title": c["claim_text"],
            "perspective_id_1": p1[0],
            "perpsective_title_1": p1[1],
            "perspective_id_2": p2[0],
            "perpsective_title_2": p2[1],
        })

In [57]:
# Let's see how our two annotators, Rick and Daniel did!
import numpy as np
import pandas as pd
import json

def is_equivalent_in_gold(pid1, pid2, gold_clusters):
    result = False
    for c in gold_clusters:
        if (pid1 in c) and (pid2 in c):
            result = True
            break
    return result

def compute_p_r_f1(df):
        
    tp = fp = tn = fn = 0
    for idx, row in df.iterrows():
        cid = row.claim_id
        pid1 = row.perspective_id_1
        pid2 = row.perspective_id_2
        
        
        pred_eq = not df.isnull().ix[idx,'Equivalent']
        gold_eq = is_equivalent_in_gold(pid1, pid2, gold_cluster_dict[cid])
        
        if pred_eq and gold_eq:
            tp += 1
        elif pred_eq and not gold_eq:
            fp += 1
        elif not pred_eq and gold_eq:
            fn += 1
        else:
            tn += 1
            
    
    prec = tp / (tp + fp)
    rec = tp / (tp + fn)

    f1 = 2 * prec * rec / (prec + rec)
    return (prec, rec, f1)

rick_result_path = '../data/dataset/human_eval/equivalence_rick.csv'
daniel_result_path = '../data/dataset/human_eval/equivalence_daniel.csv'
gold_path = '../data/dataset/human_eval/equivalence_human_eval_gold.json'

rdf = pd.read_csv(rick_result_path)
ddf = pd.read_csv(daniel_result_path)

gold_data = json.load(open(gold_path))
gold_cluster_dict = {}
for data in gold_data:
    clusters = []
    for c in data["gold_perspectives"]:
        clusters.append([p[0] for p in c[0]])
        
    gold_cluster_dict[data['claim_id']] = clusters
    
# # We only got to row 718...
rdf = rdf[rdf.index < 718]
ddf = ddf[ddf.index < 718]


print(compute_p_r_f1(rdf))
print(compute_p_r_f1(ddf))


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated


(0.8695652173913043, 0.8333333333333334, 0.851063829787234)
(0.8809523809523809, 0.7708333333333334, 0.8222222222222222)


In [66]:
# Evaluate the result on perspective relevance
import pandas as pd
import json
import statistics

persp_data_path = "../data/dataset/perspective_pool_v0.2.json"
claim_data_path = "../data/dataset/perspectrum_with_answers_v0.2.json"

persps = json.load(open(persp_data_path))
claims = json.load(open(claim_data_path))

persp_dict = {}
claim_dict = {}

for p in persps:
    persp_dict[p["pId"]] = p["text"]

for c in claims:
    claim_dict[c["cId"]] = c
    
human_anno_table = "../data/dataset/human_eval/webapp_humanannotation.csv"

df = pd.read_csv(human_anno_table)

sihao_df = df[df.author == 'Sihao']

prec_list = []
rec_list = []

for idx, row in sihao_df.iterrows():
    cid = row.claim_id
    annotations = json.loads(row.annotation)
    gold_c = claim_dict[cid]
    clusters = [_c["pids"] for _c in gold_c["perspectives"]]
    covered = [False for _ in clusters]
    for anno in annotations:
        pred_pid = anno["pId"]
        
        for idx, _c in enumerate(clusters):
            if pred_pid in _c:
                covered[idx] = True
                
                
    
    tp = [_co for _co in covered if _co]
    prec = len(tp) / len(covered)
    
    if len(annotations) == 0:
        rec = 1
    else:
        rec = len(tp) / len(annotations)
    
    prec_list.append(prec)
    rec_list.append(rec)
    
mean_prec = statistics.mean(prec_list)
mean_rec = statistics.mean(rec_list)
mean_f1 = 2 * mean_rec * mean_prec / (mean_prec + mean_rec)
print("Precision = {}".format(mean_prec))
print("Recall = {}".format(mean_rec))
print("F1 = {}".format(mean_f1))



[[777], [781]]
[[7158], [7157], [7153], [7156]]
[[5604]]
[[1314, 22061, 22062], [5825, 26012, 26013]]
[[6566], [6570], [6564, 6565]]
[[1363, 22149]]
[[5817, 26004, 26005], [2115, 5812], [5810], [5814], [2113, 5809, 22858, 22857], [5816, 26000, 26003], [5818, 26006, 26007], [5819, 26008, 26009], [5813], [25996, 25997, 25998], [2119, 22863, 22864], [2120], [2114, 22859, 22860], [2116, 22861, 22862], [2117]]
[[4931], [4929], [4924]]
[[5151], [5157, 25598, 25599], [5154, 25596, 25597], [5150], [5152], [5148], [5153, 25592, 25593, 25594, 25595], [5156], [5149]]
Precision = 0.6382716049382716
Recall = 0.8379629629629629
F1 = 0.7246110840089514


In [83]:
# Evaluate the result on perspective substantiation
prec_list = []
rec_list = []

for idx, row in sihao_df.iterrows():
    cid = row.claim_id
    annotations = json.loads(row.annotation)
    gold_c = claim_dict[cid]
    clusters = gold_c["perspectives"]
    covered = [False for _ in clusters]
    
    predictions = []
    
    tp = fp = fn = 0
    for anno in annotations:
        pred_pid = anno["pId"]
        pred_evidences = json.loads(anno["evidences"])
        
        for _c in clusters:
            if pred_pid in _c["pids"]:
                gold_evis = _c["evidence"]
                predictions.append((pred_evidences, gold_evis))

    for pred, gold in predictions:
        if len(pred) == 0:
            if len(gold) > 0:
                fn += 1
        else:
            pred_evi = pred[0]
            if pred_evi in gold:
                tp += 1
    
    if len(predictions) == 0:
        prec_list.append(1)
    else:
        prec_list.append(tp / len(predictions))
        
    if tp + fn == 0:
        rec_list.append(1)
    else:
        rec_list.append(tp / (tp + fn))

print(prec_list, rec_list)

mean_prec = statistics.mean(prec_list)
mean_rec = statistics.mean(rec_list)
mean_f1 = 2 * mean_rec * mean_prec / (mean_prec + mean_rec)
print("Precision = {}".format(mean_prec))
print("Recall = {}".format(mean_rec))
print("F1 = {}".format(mean_f1))

                    

[1.0, 1.0, 0.0, 0.3333333333333333, 1.0, 1, 0.42857142857142855, 0.0, 0.14285714285714285] [1.0, 1.0, 1, 1.0, 1.0, 1, 0.5, 1, 0.5]
Precision = 0.544973544973545
Recall = 0.8888888888888888
F1 = 0.6756867568675686
