In [1]:
import glob
import json
import altair as alt
import re
import os
import tqdm
from collections import Counter
import sklearn.metrics
import scipy.spatial.distance
import numpy as np
import math
import pandas as pd

In [2]:
data = {
    os.path.basename(f) : json.load(open(f))
    for f in glob.glob('./aggregated_files/*.json')
}
unist_mapping = json.load(open('./unist_mapping.json'))

In [3]:
def compute_micro_f1_tacred(targets, preds, no_relation_idx = ['no_relation', 'no relation']):
    guessed_by_relation = Counter()
    gold_by_relation = Counter()
    correct_by_relation = Counter()
    for row in range(len(preds)):
        gold = targets[row]
        guess = preds[row]

        if gold in no_relation_idx and guess in no_relation_idx:
            pass
        elif gold in no_relation_idx and guess not in no_relation_idx:
            guessed_by_relation[guess] += 1
        elif gold not in no_relation_idx and guess in no_relation_idx:
            gold_by_relation[gold] += 1
        elif gold not in no_relation_idx and guess not in no_relation_idx:
            guessed_by_relation[guess] += 1
            gold_by_relation[gold] += 1
            if gold == guess:
                correct_by_relation[guess] += 1

    prec_micro = 1.0
    if sum(guessed_by_relation.values()) > 0:
        prec_micro = float(sum(correct_by_relation.values())) / float(sum(guessed_by_relation.values()))
    recall_micro = 0.0
    if sum(gold_by_relation.values()) > 0:
        recall_micro = float(sum(correct_by_relation.values())) / float(sum(gold_by_relation.values()))
    f1_micro = 0.0
    if prec_micro + recall_micro > 0.0:
        f1_micro = (2.0 * prec_micro * recall_micro) / (prec_micro + recall_micro)
    return f1_micro

In [4]:
test_f1s = {m:0 for m in data['controlled_tacred_test_sub1_obj.json'][0]['results']}

for model in test_f1s:
    preds_test = json.load(open(f'./models/{model}/test.json'))
    pred = [x[0] for x in preds_test]
    gold = [x[1] for x in preds_test]
    test_f1s[model] = compute_micro_f1_tacred(pred, gold)
    
test_f1s

{'LUKE': 0.7205131943431987,
 'SpanBERT': 0.43071613459879204,
 'SURE': 0.7484536082474227,
 'TYP_marker': 0.7208764079617342,
 'UniST': 0.703357350912157,
 'NLI_w': 0.6861173453029816,
 'NLI_wo': 0.4272930648769575}

In [5]:
model = 'NLI_wo'
correct_rels = [x['correct_relation'] for x in data['test.json']]
predicted = [x['results'][model] for x in data['test.json']]
compute_micro_f1_tacred(predicted, correct_rels)

0.4272930648769575

In [6]:
no_relations = {
    m : {
        x : 0 for x in glob.glob(f'./models/{m}/*.json')
    }
    for m in data['controlled_tacred_test_sub1_obj.json'][0]['results']
}
l = len(predictions['SURE'])
for model in no_relations:
    for x in no_relations[model]:
        model_predictions = json.load(open(x))
        predicted_no_rels = len([sen for sen in model_predictions if sen[0]=='no_relation' or sen[0]=='no relation'])
        actual_no_rels = len([sen for sen in model_predictions if sen[1]=='no_relation' or sen[1]=='no relation'])
        diff = predicted_no_rels - actual_no_rels
        diff_perc = diff * 100 / actual_no_rels
        no_relations[model][x] = diff_perc
no_relations

NameError: name 'predictions' is not defined

In [None]:
controlled = 0
model = 'CNN'
for i, file in enumerate(sorted(list(data.keys()))):
    print(file)
    sentences = data[file] if 'controlled' in file else data[file]
    preds = [sentence['results'][model] for sentence in sentences]
    gold = [sentence['correct_relation'] for sentence in sentences]
    f1 = compute_micro_f1_tacred(gold, preds)*100
    print(f1)
    if 'controlled' in file:
        controlled += f1
    else:
        test_f1 = f1
    if (i+1) % 3 == 0:
        print('-----')
print('######')
avg_controlled = controlled / 12
loss = (test_f1 - avg_controlled)*100/test_f1
print(f"average controlled: {avg_controlled}")
print(f"percentage loss: {loss}")

In [None]:
controlled = 0
model = 'CNN'
test_f1 = 0
data_file = {file : json.load(open(file)) for file in glob.glob(f'./models/{model}/*')}
for i, file in enumerate(sorted(list(data_file.keys()))):
    sentences = data_file[file]
    preds = [sentence[0] for sentence in sentences]
    gold = [sentence[1] for sentence in sentences]
    f1 = compute_micro_f1_tacred(gold, preds)*100
    print(file, f1)
    if 'controlled' in file:
        controlled += f1
    else:
        test_f1 = f1
    if (i+1) % 3 == 0:
        print('-----')
print('######')
avg_controlled = controlled / 12
loss = (test_f1 - avg_controlled)*100/test_f1
print(f"average controlled: {avg_controlled}")
print(f"percentage loss: {loss}")

In [26]:
x = json.load(open('./models/LUKE/test.json'))
len(x)

15509

In [27]:
y = json.load(open('./models/LUKE/controlled_tacred_test_sub1_obj.json'))
len(y)

5102

In [28]:
15509 - 5102

10407

In [7]:
percentages = {
    m : {
        d : [0,0] for d in data
        if 'controlled' in d#(correctly predicted, actual total)
    }
    for m in data['controlled_tacred_test_sub1_obj.json'][0]['results']
}
predictions = {
    m : {}
    for m in data['controlled_tacred_test_sub1_obj.json'][0]['results']
}
heatmap_source = {
    m : {
    }
    for m in data['controlled_tacred_test_sub1_obj.json'][0]['results']
}
predictions['correct'] = {}

In [8]:
adversarial_wise_pred = {
    adv : {
        m : {}
        for m in data['controlled_tacred_test_sub1_obj.json'][0]['results']
    }
    for adv in data
    if 'controlled' in adv
}
all_rels_count = {}

In [9]:
for file in [x for x in data if 'controlled' in x]:
    for sentence in data[file]:
        gold_rel = sentence['correct_relation']
        if gold_rel not in all_rels_count:
            all_rels_count[gold_rel] = 0
        all_rels_count[gold_rel] += 1
        if gold_rel not in predictions['correct']:
            predictions['correct'][gold_rel] = 0
        for model in heatmap_source:
            if gold_rel not in heatmap_source[model]:
                heatmap_source[model][gold_rel] = {}
                for adv in adversarial_wise_pred:
                    adversarial_wise_pred[adv][model][gold_rel] = {'total':0, 'correct':0}
        predictions['correct'][gold_rel] += 1

In [10]:
for d in tqdm.tqdm([x for x in data if 'controlled' in x]):
    file = data[d]
    for sentence in file:
        correct_rel = sentence['correct_relation']
        possible_rels = sentence['relation_per_types']
        for model in sentence['results']:
            adversarial_wise_pred[d][model][correct_rel]['total'] += 1
            percentages[model][d][1] += 1
            predicted = sentence['results'][model]
            if predicted not in predictions[model]:
                predictions[model][predicted] = 0
            if predicted not in heatmap_source[model][correct_rel]:
                heatmap_source[model][correct_rel][predicted] = 0
            heatmap_source[model][correct_rel][predicted] += 1
            predictions[model][predicted] +=1
            if predicted in possible_rels:
                percentages[model][d][0] +=1
            if correct_rel == predicted:
                adversarial_wise_pred[d][model][correct_rel]['correct'] += 1      

100%|██████████| 12/12 [00:01<00:00,  7.15it/s]


In [16]:
models_to_name = {
 'LUKE' : 'LUKE',
 'NLI_wo' : 'NLI (w/o)',
 'SpanBERT' : 'SpanBERT',
 'TYP_marker' : 'TYP-marker',
 'CNN' : 'CNN',
 'SURE' : 'SuRE',
 'NLI_w' : 'NLI (w/)',
 'UniST' : 'UniST',
 'correct' : 'correct'
}
subst_to_name = {
    'sub1_subj' : 'same-role subj',
     'sub1_obj' : 'same-role obj',
    'sub1_subj+obj': 'same-role subj+obj',
     'sub2_subj' : 'same-type subj',
     'sub2_obj' : 'same-type obj',
     'sub2_subj+obj' : 'same-type subj+obj',
     'sub3_subj' : 'diff.-type subj',
     'sub3_obj' : 'diff.-type obj',
     'sub3_subj+obj' : 'diff.-type subj+obj',
     'sub4_subj' : 'masked subj',
     'sub4_obj' : 'masked obj',
     'sub4_subj+obj' : 'masked subj+obj'
}

In [17]:
import regex as re
from altair.expr import datum


pattern = r'sub[0-9]\_[a-z+]*'

percentages_values = [
    [
        models_to_name[m],
        subst_to_name[re.findall(pattern, d)[0]],
        round(percentages[m][d][0] / percentages[m][d][1] * 100, 2)]
    for m in percentages
    for d in percentages[m]
    if 'controlled' in d
]

correct_preds = {}
for k in predictions:
    correct_preds[models_to_name[k]] = predictions[k]
    

In [18]:
source = pd.DataFrame(percentages_values, columns=['model', 'substitution', 'percentage'])

In [19]:
sort = [
    'sub1_subj',
     'sub1_obj',
    'sub1_subj+obj',
     'sub2_subj',
     'sub2_obj',
     'sub2_subj+obj',
     'sub3_subj',
     'sub3_obj',
     'sub3_subj+obj',
     'sub4_subj',
     'sub4_obj',
     'sub4_subj+obj'
]
sort = [subst_to_name[s] for s in sort]

In [20]:
heatmap = alt.Chart(source).mark_rect().encode(
    y= alt.Y('model:N', title=""),
    x=alt.X('substitution:N', sort=sort, axis=alt.Axis(labelAngle=-45), title=""),
    color=alt.Color('percentage:Q', legend=None, scale=alt.Scale(scheme='lighttealblue'))
).properties(
    width=500,
    height=350
)

text = alt.Chart(source).mark_text(baseline='middle', fontSize=14).encode(
    y='model:N',
    x=alt.X('substitution:N', sort=sort, axis=alt.Axis(labelAngle=-45)),
    text='percentage:Q',
    color=alt.condition(datum['percentage'] < 30,
                        alt.value('grey'),
                        alt.value('white'))
)

rule1 = alt.Chart(source).mark_rule(stroke='white',strokeWidth=2.2).encode(x=alt.value(125))
rule2 = alt.Chart(source).mark_rule(stroke='white',strokeWidth=2.2).encode(x=alt.value(250))
rule3 = alt.Chart(source).mark_rule(stroke='white',strokeWidth=2.2).encode(x=alt.value(375))
rule4 = alt.Chart(source).mark_rule(stroke='white',strokeWidth=0.05).encode(y=alt.value(44))
rule5 = alt.Chart(source).mark_rule(stroke='white',strokeWidth=0.05).encode(y=alt.value(87))
rule6 = alt.Chart(source).mark_rule(stroke='white',strokeWidth=0.05).encode(y=alt.value(130))
rule7 = alt.Chart(source).mark_rule(stroke='white',strokeWidth=0.05).encode(y=alt.value(43))
rule8 = alt.Chart(source).mark_rule(stroke='white',strokeWidth=0.05).encode(y=alt.value(43))
rule9 = alt.Chart(source).mark_rule(stroke='white',strokeWidth=0.05).encode(y=alt.value(43))
rule10 = alt.Chart(source).mark_rule(stroke='white',strokeWidth=0.05).encode(y=alt.value(43))

alt.layer(heatmap, text,rule1,rule2,rule3).configure_view(
    stroke='transparent'
).configure_axis(
    domainWidth=0.8
).configure_axis(
    labelFontSize=15,
    titleFontSize=12
)
#heatmap + text + rule1

In [19]:
all_relations = json.load(open('./rel2types.json'))
all_relations = list(all_relations.keys()) + ['no_relation']

total = sum([predictions['correct'][x] for x in predictions['correct']])
predictions_source = [
    [
        model,
        relation,
        predictions[model].get(relation,0),
        predictions['correct'].get(relation,0),
        (predictions[model].get(relation,0)/total)-(predictions['correct'].get(relation,0)/total)
    ]
                      for model in predictions
                      for relation in all_relations]
predictions_source = pd.DataFrame(predictions_source, columns=['model', 'relation', 'predicted', 'correct', 'difference'])

In [20]:
df = []
for model in heatmap_source:
    for relation in heatmap_source[model]: 
        total = sum([heatmap_source[model][relation][r2] for r2 in heatmap_source[model][relation]])
        for relation2 in all_relations:
            row = [model, relation, relation2, heatmap_source[model][relation].get(relation2, 0)/total*100]
            df.append(row)
        
df = pd.DataFrame(df, columns=['model','correct', 'predicted', 'count'])

In [98]:
all_charts =  [
    alt.Chart(df[df['model'] == m]).mark_rect().encode(
    x=alt.X('predicted:N', axis=alt.Axis(labelAngle=-45)),
    y='correct:N',
    color=alt.Color('count:Q',scale=alt.Scale(scheme='lightgreyteal'))
).properties(
    width=410,
    height=300,
    title=m
)
for m in list(set(df['model']))]

In [68]:
base = alt.Chart(df).mark_rect().encode(
    x='correct:N',
    y='predicted:N',
    color='count:Q'
).properties(
    width=160,
    height=160
)

In [69]:
models = list(set(predictions_source['model']))

In [70]:
def kl_divergence(p,q):
    return sum(p[i] * math.log2(p[i]/(q[i]+0.001)) for i in range(len(p)))

In [99]:
all_rels_sorted = [k[0] for k in sorted(all_rels_count.items(), key= lambda k:k[1], reverse=True)]

In [100]:
def plot_pred(model):
    source = predictions_source[
        ((predictions_source['model'] == model) |
        (predictions_source['model'] == 'correct')) &
        (predictions_source['relation'] != 'no_relation')
    ]
    source['model'] = ['actual' if x == 'correct' else 'predicted' for x in source['model']]
    p = list(source['correct'])
    q = list(source['predicted'])
    tots = sum(p)
    p = [n/tots for n in p]
    q = [n/tots for n in q]
    subt = round(scipy.stats.entropy(p,q), 2)
    chart = alt.Chart(source).mark_line().encode(
        y= alt.Y('predicted:Q', title=""),
        x = alt.X('relation:N', axis=alt.Axis(labelAngle=-45), title="", sort=all_rels_sorted),
        strokeDash = alt.StrokeDash('model:N', legend=alt.Legend(title='')),
        color=alt.Color('model:N'),
    ).properties(
        width=600,
        height=250,
        title={
            "text" : [model],
            #"subtitle" : [f"DIVERGENCE: {str(subt)}"]
        }
    )
    return chart

In [101]:
all_charts = [plot_pred(m) for m in models if m != 'correct']
alt.vconcat(
    (all_charts[1]|all_charts[2]),
    (all_charts[3]|all_charts[4]),
    (all_charts[5]|all_charts[6]),
    (all_charts[0]),
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [49]:
source_correct = predictions_source[
    (predictions_source['relation'] != 'no_relation') & (predictions_source['model']=='correct')
]
source_all = predictions_source[
    (predictions_source['relation'] != 'no_relation') & (predictions_source['model'] != 'correct')
]
chart = alt.Chart(source_correct).mark_line().encode(
    y= alt.Y('predicted:Q', title=""),
    x = alt.X('relation:N', axis=alt.Axis(labelAngle=-45), title="", sort=all_rels_sorted),
    #strokeDash = alt.StrokeDash('model:N', legend=alt.Legend(title='')),
    color=alt.Color('model:N'),
).properties(
    width=600,
    height=250,
    title={
        "text" : [model],
        #"subtitle" : [f"DIVERGENCE: {str(subt)}"]
    }
)
plot = alt.Chart(source_all).mark_circle(size=60).encode(
    x = alt.X('relation:N', axis=alt.Axis(labelAngle=-45), title="", sort=all_rels_sorted),
    y = alt.Y('predicted:Q', title=""),
    color=alt.Color('model:N')
)
chart+plot

In [57]:
div_avg = (.43 + .13 + .13 + .25 + .26 + .36 + .16) / 7
div_avg

0.24571428571428572

In [106]:
for adv in adversarial_wise_pred:
    for model in adversarial_wise_pred[adv]:
        for relation in adversarial_wise_pred[adv][model]:
            adversarial_wise_pred[adv][model][relation]['percentage'] = adversarial_wise_pred[adv][model][relation]['correct'] / adversarial_wise_pred[adv][model][relation]['total'] * 100


In [107]:
pattern = r'sub[0-9]\_[a-z+]*'

df = []
for adv in adversarial_wise_pred:
    adv_str = subst_to_name[re.findall(pattern,adv)[0]]
    for model in adversarial_wise_pred[adv]:
        for rel in adversarial_wise_pred[adv][model]:
            row = [adv_str, model, rel, adversarial_wise_pred[adv][model][rel]['percentage']]
            df.append(row)
df = pd.DataFrame(df, columns = ['adversarial','model', 'relation','percentage'])

In [108]:
all_charts =  [
    alt.Chart(df[df['adversarial'] == m]).mark_rect().encode(
    y=alt.Y('model:N', axis=alt.Axis(labelAngle=-0)),
    x=alt.X('relation:N', axis=alt.Axis(labelAngle=-45)),
    color=alt.Color('percentage:Q',scale=alt.Scale(scheme='lightgreyteal'))
).properties(
    width=500,
    height=100,
    title=m
)
for m in sort]

In [109]:
alt.vconcat(
    (all_charts[0]|all_charts[1]|all_charts[2]),
    (all_charts[3]|all_charts[4]|all_charts[5]),
    (all_charts[6]|all_charts[7]|all_charts[8]),
    (all_charts[9]|all_charts[10]|all_charts[11]),

)

In [54]:
heatmap_source

{'LUKE': {'no_relation': {'no_relation': 41122,
   'org:top_members/employees': 441,
   'per:cities_of_residence': 210,
   'per:title': 201,
   'per:employee_of': 526,
   'per:alternate_names': 44,
   'per:other_family': 83,
   'org:parents': 118,
   'per:children': 38,
   'per:spouse': 86,
   'org:alternate_names': 252,
   'per:countries_of_residence': 89,
   'org:city_of_headquarters': 120,
   'org:member_of': 3,
   'per:charges': 47,
   'per:siblings': 65,
   'org:shareholders': 42,
   'org:founded_by': 28,
   'per:parents': 51,
   'org:members': 12,
   'per:origin': 28,
   'per:cause_of_death': 28,
   'org:stateorprovince_of_headquarters': 42,
   'org:subsidiaries': 29,
   'per:stateorprovinces_of_residence': 63,
   'org:country_of_headquarters': 81,
   'per:age': 6,
   'per:city_of_death': 17,
   'per:schools_attended': 15,
   'org:political/religious_affiliation': 5,
   'org:dissolved': 20,
   'per:date_of_death': 50,
   'per:stateorprovince_of_death': 5,
   'per:religion': 12,
 

In [55]:
df_heatmap = []
for model in heatmap_source:
    for relation_g in heatmap_source[model]:
        for relation_p in heatmap_source[model][relation_g]:
            row = [model, relation_g, relation_p, heatmap_source[model][relation_g][relation_p]]
            df_heatmap.append(row)
df_heatmap = pd.DataFrame(df_heatmap, columns=['model','gold_rel', 'pred_rel', 'value'])

In [58]:
alt.Chart(df_heatmap.loc[(df_heatmap['model']=='LUKE') & (df_heatmap['gold_rel']!='no_relation') & (df_heatmap['pred_rel'] != 'no_relation')]).mark_rect().encode(
    x='gold_rel:O',
    y='pred_rel:O',
    color='value:Q'
)

### EXAMPLES

In [186]:
from numpy.random import default_rng

rng = default_rng()
numbers = rng.choice(len(data['controlled_tacred_test_sub1_obj.json']), size=100, replace=False)

In [187]:
numbers[5]

1035

In [193]:
def get_example(num):
    sentences = {}
    for adversarial in data:
        sentence = ' '.join(data[adversarial][num]['tokens'])
        sentences[adversarial] = {
            "text" : sentence,
            "possible" : data[adversarial][num]['relation_per_types'],
            'head' : (data[adversarial][num]['head'][0], data[adversarial][num]['head'][-1]),
            'tail' : (data[adversarial][num]['tail'][0], data[adversarial][num]['tail'][-1]),
            "correct" : data[adversarial][num]['correct_relation'],
            "results" : data[adversarial][num]['results'],
        }
    return sentences

In [194]:
examples = []
for num in numbers:
    try:
        example = get_example(num)['controlled_tacred_test_sub1_subj+obj.json']
        if example['results']['LUKE'] not in example['possible'] and example['results']['SURE'] != example['correct']:
            examples.append(example)
    except IndexError:
        pass

In [195]:
examples

[{'text': 'In late July , the Rosoboronexport and the United Nations Development Program agreed on a project to phase out energy consuming incandescent lamps and promote low energy - consuming bulbs in Iran .',
  'possible': ['org:parents',
   'org:country_of_headquarters',
   'org:member_of',
   'org:members',
   'org:subsidiaries'],
  'head': ('Rosoboronexport', 'ORGANIZATION'),
  'tail': ('Iran', 'COUNTRY'),
  'correct': 'org:country_of_headquarters',
  'results': {'LUKE': 'no_relation',
   'SpanBERT': 'no_relation',
   'SURE': 'no_relation',
   'TYP_marker': 'no_relation',
   'UniST': 'no_relation',
   'NLI_w': 'no_relation',
   'NLI_wo': 'no_relation'}},
 {'text': 'Upon landing , the boy identified by the foreign ministry only as Ruben from Sydney in the southern Netherlands and more fully as Ruben Xie Delong van Assouw by Dutch media , was taken by ambulance with two doctors and a nurse to hospital .',
  'possible': ['per:city_of_birth',
   'per:cities_of_residence',
   'per:city

In [10]:
def compute_perc_difference(x,y):
    diff = x-y
    perc = diff *100 /x
    return perc

In [11]:
results = [
    (53.8, 50.1),
    (50.5, 45.6),
    (40.1, 22.9),
    (33.5, 20.6)
]
diff_percs = [compute_perc_difference(x[0], x[1]) for x in results]

In [12]:
sum(diff_percs) / len(diff_percs)

24.495131120867924

In [13]:
diff = [x[0]-x[1] for x in results]
sum(diff)/len(diff)

9.674999999999999

In [14]:
40.1-22.9

17.200000000000003