In [1]:
import json
import pandas as pd
import plotly.express as px

In [9]:
# constants

critical_relation_names = {
    'residence': ['per:countries_of_residence', "per:cities_of_residence", "per:stateorprovinces_of_residence"],
    'headquarter': ["org:country_of_headquarters", "org:city_of_headquarters", "org:stateorprovince_of_headquarters"],
    'death': ["per:city_of_death", "per:stateorprovince_of_death", "per:country_of_death"],
    'birth': ["per:city_of_birth", "per:stateorprovince_of_birth", "per:country_of_birth", "per:origin"],
    'name': ['org:alternate_names', "per:alternate_names"],
    'religion': ["per:religion", "org:political/religious_affiliation"],
    'member': ["org:member_of", "org:top_members/employees", "per:employee_of"]
}

critical_relations = []
for _, v in critical_relation_names.items():
    critical_relations.extend(v)

models = ["LUKE", "SpanBERT", "SURE", "TYP_marker", "UniST", "NLI_w", "NLI_wo"]

adv_strategy2names = {
    "controlled_tacred_test_sub1_obj.json": "same-role obj",
    "controlled_tacred_test_sub1_subj.json": "same-role subj",
    "controlled_tacred_test_sub1_subj+obj.json": "same-role subj+obj",
    "controlled_tacred_test_sub2_obj.json": "same-type obj",
    "controlled_tacred_test_sub2_subj.json": "same-type subj",
    "controlled_tacred_test_sub2_subj+obj.json": "same-type subj+obj",
    "controlled_tacred_test_sub3_obj.json": "diff-type obj",
    "controlled_tacred_test_sub3_subj.json": "diff-type subj",
    "controlled_tacred_test_sub3_subj+obj.json": "diff-type subj+obj",
    "controlled_tacred_test_sub4_obj.json": "masked obj",
    "controlled_tacred_test_sub4_subj.json": "masked subj",
    "controlled_tacred_test_sub4_subj+obj.json": "masked subj+obj"}

sub_2_name = {
    'sub1': 'same-role',
    'sub2': 'same-type',
    'sub3': 'diff-type',
    'sub4': 'masked'
}

adv_strategies = list(adv_strategy2names.keys())
results_file_path = f'./data/mapping_preds.json'

In [10]:
# config
model = models[0]

In [11]:
def get_in_set_flow(critical_relation_name, model, adv_strategy, result):


    _, _, _, adv_s, position = adv_strategy.replace('.json', '').split('_')

    adv_s = sub_2_name[adv_s]

    if adv_s not in result:
        result[adv_s] = {}
    if position not in result[adv_s]:
        result[adv_s][position] = {}

    critical_relations = critical_relation_names[critical_relation_name]
    # read predictions
    results_data = json.load(open(results_file_path))
    rel_true = []
    rel_pred = []

    #for _, sample in results_data.items():
    #    if sample['test_preds']["UniST"] in unist_relation_mapping:
    #        sample['test_preds']["UniST"] = unist_relation_mapping[sample['test_preds']["UniST"]]

    #json.dump(results_data, open('./data/mapping_preds_new.json', 'w'), indent=4)


    for s, sample in results_data.items():
            rel_true.append(sample['test_preds'][model])
            rel_pred.append(sample['adv_preds'][adv_strategy][model])

    same = []
    for t, p in zip(rel_true, rel_pred):
        if t in critical_relations and p in critical_relations:
            same.append(t != p)
    if len(same) >= 10:
        f = (round(len(same), 0))
        #f = (round(100 * sum(1 for x in same if x) / len(same), 0))
    else:
        f = None

    result[adv_s][position][critical_relation_name] = f

In [12]:
result = {}
for a in adv_strategy2names.keys():
    for b in sorted(critical_relation_names.keys()):

        get_in_set_flow(b, 'LUKE', a, result)

In [13]:
df = pd.DataFrame(columns=['residence', 'headquarters', 'death', 'birth', 'name', 'religion', 'member'])
for p in 'subj', 'obj', 'subj+obj':
    for s in 'same-role', 'same-type', 'diff-type', 'masked':
        k = []
        for r in ['residence', 'headquarter', 'death', 'birth', 'name', 'religion', 'member']:
            k.append(result[s][p][r])

        df.loc[s + ' ' + p] = k

In [14]:
df.to_csv('./data/in_set_flow.csv')

In [15]:
fig = px.imshow(df, text_auto=True)
fig.update_layout(coloraxis_showscale=False)
fig.show()

In [16]:
df

Unnamed: 0,residence,headquarters,death,birth,name,religion,member
same-role subj,159.0,87.0,,25.0,85.0,10.0,362.0
same-type subj,156.0,92.0,,23.0,98.0,,386.0
diff-type subj,124.0,80.0,,22.0,81.0,13.0,335.0
masked subj,144.0,104.0,,24.0,111.0,,352.0
same-role obj,138.0,81.0,,30.0,85.0,,350.0
same-type obj,134.0,83.0,,11.0,79.0,,321.0
diff-type obj,45.0,35.0,,,43.0,,254.0
masked obj,39.0,25.0,,,133.0,,345.0
same-role subj+obj,118.0,61.0,,22.0,74.0,,335.0
same-type subj+obj,114.0,59.0,,11.0,69.0,,329.0


In [19]:
import numpy
numpy.mean(df.mean())

98.47222222222221