In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', -1)

import numpy as np
import sklearn
from sklearn import metrics
import nltk 
from collections import Counter
import matplotlib.pyplot as plt
import itertools
import ast

nltk.download('stopwords')
from nltk.corpus import stopwords 
stop_words = set(stopwords.words('english'))

### BOOTLEG ###
# import import_ipynb
# import LoadEntityProfiles

[nltk_data] Downloading package stopwords to
[nltk_data]     /lfs/1/simran/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
# Bootleg utility functions:
# BY ALIAS: 
def get_candidates(alias):
    try:
        # To get qid candidates of an alias
        cands = LoadEntityProfiles.esp.get_qid_cands(alias)
        return cands
#         print(f"Cands {cands}")
#         print([es.get_title(qid) for qid in cands])
    except:
        pass

In [20]:
# Manual User Inputs
model_base = '/dfs/scratch1/simran/tacred/tacred-relation-bootleg/saved_results/09082020-bootleg_BASE_dev_rev_ent.csv'
model_alias = '/dfs/scratch1/simran/tacred/tacred-relation-bootleg/saved_results/09102020-095400_adding_manual_alias_dev_rev_ent.csv'


In [21]:
LABEL_TO_ID = {'no_relation': 0, 'per:title': 1, 'org:top_members/employees': 2, 'per:employee_of': 3, 
               'org:alternate_names': 4, 'org:country_of_headquarters': 5, 'per:countries_of_residence': 6, 
               'org:city_of_headquarters': 7, 'per:cities_of_residence': 8, 'per:age': 9, 
               'per:stateorprovinces_of_residence': 10, 'per:origin': 11, 'org:subsidiaries': 12, 
               'org:parents': 13, 'per:spouse': 14, 'org:stateorprovince_of_headquarters': 15, 'per:children': 16, 
               'per:other_family': 17, 'per:alternate_names': 18, 'org:members': 19, 'per:siblings': 20, 
               'per:schools_attended': 21, 'per:parents': 22, 'per:date_of_death': 23, 'org:member_of': 24, 
               'org:founded_by': 25, 'org:website': 26, 'per:cause_of_death': 27, 
               'org:political/religious_affiliation': 28, 'org:founded': 29, 'per:city_of_death': 30, 
               'org:shareholders': 31, 'org:number_of_employees/members': 32, 'per:date_of_birth': 33, 
               'per:city_of_birth': 34, 'per:charges': 35, 'per:stateorprovince_of_death': 36, 'per:religion': 37, 
               'per:stateorprovince_of_birth': 38, 'per:country_of_birth': 39, 'org:dissolved': 40, 
               'per:country_of_death': 41}

LABEL_LST = list(LABEL_TO_ID.keys())

STANFORD_NER_TYPES = ['DATE', 'LOCATION', 'MONEY', 'ORGANIZATION', 'PERCENT', 'PERSON', 'TIME']
punctuation = [',', ':', '.', ';', "'", ')', '(', "'s", '--', '-', '``', "''"]

In [22]:
# Load the model data 
df_results_base = pd.read_csv(model_base)
df_results_var = pd.read_csv(model_alias)

# add var predictions to df_results_base
df_results_base.rename(columns={'prediction':'prediction_base'}, inplace=True)
map_id_pred = {}
for ind, row in df_results_var.iterrows():
    map_id_pred[row['id']] = row['prediction']
df_results_base['prediction_var'] = df_results_base['id'].map(map_id_pred)

# error rates
df_errors_base = df_results_base[df_results_base['relation'] != df_results_base['prediction_base']]
df_errors_var = df_results_base[df_results_base['relation'] != df_results_base['prediction_var']]

print("FULL_model size: ", df_results_base.shape)
print("ERRS_model size: ", df_errors_base.shape)

print("FULL_model_var size: ", df_results_var.shape)
print("ERRS_model_var size: ", df_errors_var.shape)

FULL_model size:  (22631, 27)
ERRS_model size:  (2596, 27)
FULL_model_var size:  (22631, 26)
ERRS_model_var size:  (2595, 27)


In [23]:
# Utility functions: accepts a df with corrected slices, and outputs the predicted result
from sklearn.metrics import f1_score
from sklearn.metrics import precision_recall_fscore_support as score
def score_corrections(df):
    import scorer
    scorer.score(df['relation_model'].tolist(), df['prediction_model'].tolist(), verbose=True)
    df_errs = df[df['relation_model'] != df['prediction_model']]
    print("Number of Errors: ", df_errs.shape[0])
# score_corrections(df_results)

In [24]:
# basic error rate by relation 
def error_rate_by_relation(df_r, key, df_e):
    relation_df = pd.DataFrame(columns=['relation','error_rate','error_count','total_count'])
    index = 0
    for k, v in LABEL_TO_ID.items():
        df_relation_tot = df_r[df_r[key] == k] # all rows with k = true trelation
        tot = df_relation_tot.shape[0] # number of examples with this true relation
    
        df_relation_err = df_e[df_e[key] == k] # error rows with k = true trelation
        err = df_relation_err.shape[0] # number of errors with this true relation
        error_rate = err/tot
    
        relation_df.loc[index] = pd.Series({'relation':k, 'error_rate':error_rate, 'error_count':err, 'total_count':tot})
        index += 1
    
    print(relation_df.sort_values('relation'))

In [25]:
df_errors_var.columns

Index(['obj', 'obj_mentions', 'subj_qids', 'prop_mentions', 'real_mentions',
       'subj_type', 'obj_type', 'id', 'subj_pos', 'subj_leng', 'obj_qids',
       'subj_mentions', 'qids', 'obj_leng', 'obj_ner', 'stanford_ner',
       'subj_ner', 'prediction_base', 'subj', 'example', 'mentions',
       'separation_dist', 'prop_ner', 'obj_pos', 'relation', 'num_ner',
       'prediction_var'],
      dtype='object')

In [33]:
sub_df = df_results_base[df_results_base['example'].str.contains('organisation')]
print(sub_df.shape)
sub_df = df_errors_base[df_errors_base['example'].str.contains('organisation')]
sub_df.shape
# print(sub_df)

(77, 27)


(5, 27)

In [34]:
sub_df = df_results_var[df_results_var['example'].str.contains('organisation')]
print(sub_df.shape)
sub_df = df_errors_var[df_errors_var['example'].str.contains('organisation')]
sub_df.shape

(77, 26)


(5, 27)

## Analysis by Relation

In [27]:
# alternate_names1 = {}

# for index, row in df_errors_var.iterrows():
#     if row['relation'] in 'org:alternate_names':
#         add = 0
#         tokens = row['example'].split(' ')
    
#         ss, se = row['subj_start'], row['subj_end']
#         subj = tokens[ss:se+1]
#         subj_qid = row['ent_id'][ss:se+1]
#         subj_ner = row['stanford_ner'][ss:se+1]
        
        
#         os, oe = row['obj_start'], row['obj_end']
#         obj = tokens[os:oe+1]
#         obj_qid = row['ent_id'][os: oe+1]
#         obj_ner = row['stanford_ner'][os: oe+1]
        
        
#         if len(subj) == 1 and subj_ner[0] == 'ORGANIZATION':
#             if subj_qid[0] != obj_qid[0]:
#                 add = 1
#         elif len(obj) == 1 and obj_ner[0] == 'ORGANIZATION':
#             if subj_qid[0] != obj_qid[0]:
#                 add = 1
        
#         if add == 1:
#             alternate_names1[row['id']] = {'subj':subj, 'subj_qid':subj_qid, 'obj':obj, 'obj_qid':obj_qid, 'example':tokens}
#             print(subj, subj_qid, obj, obj_qid)
#             print(row['example'])
#             print()
                    
# print(len(alternate_names1.items()))

In [None]:
#error_rate_by_relation(df_results, 'relation_model', df_errors)
relation_df = pd.DataFrame(columns=['relation','errrate_var','errcount_var', 'errrate_base','errcount_base','total_count'])
index = 0
for k, v in LABEL_TO_ID.items():
        df_relation_tot_var = df_results_var[df_results_var['relation'] == k] # all rows with k = true trelation
        tot_var = df_relation_tot_var.shape[0] # number of examples with this true relation
        if tot_var > 0:
    
            df_relation_err_var = df_errors_var[df_errors_var['relation'] == k] # error rows with k = true trelation
            err_var = df_relation_err_var.shape[0] # number of errors with this true relation
            error_rate_var = err_var/tot_var

            df_relation_tot_base = df_results_base[df_results_base['relation'] == k] # all rows with k = true trelation
            tot_base = df_relation_tot_base.shape[0] # number of examples with this true relation

            df_relation_err_base = df_errors_base[df_errors_base['relation'] == k] # error rows with k = true trelation
            err_base = df_relation_err_base.shape[0] # number of errors with this true relation
            error_rate_base = err_base/tot_base

            relation_df.loc[index] = pd.Series({'relation':k, 'errrate_var':error_rate_var, 'errcount_var':err_var, 'errrate_base':error_rate_base,'errcount_base': err_base, 'total_count':tot_var})
            index += 1
    
print(relation_df.sort_values('relation'))

# For the examples our model gets, but the baseline (vanilla) model misses, why does bootleg help?

We want to confirm that the improvement ties back to insights in bootleg - i.e., the mentions/types/relations in bootleg's database.

In [10]:
# get the set difference of errors
df_missed_by_variation = df_errors_var[~df_errors_var['id'].isin(df_errors_base['id'])]
df_missed_by_base = df_errors_base[~df_errors_base['id'].isin(df_errors_var['id'])]
print("The #examples df_missed_by_variation only are: ", df_missed_by_variation.shape[0])
print("The #examples df_missed_by_base only are: ", df_missed_by_base.shape[0])

The #examples df_missed_by_variation only are:  689
The #examples df_missed_by_base only are:  814


In [11]:
# THE DISTRIBUTION of RELATIONS WHEN MODEL IMPOVES OVER SPANBERT
print("THE DISTRIBUTION of RELATIONS WHEN BASE IMPROVES OVER VARIATION")
relations_errs_matchqids = []
for index, row in df_missed_by_variation.iterrows():
    relations_errs_matchqids.append(row['relation'])
print(Counter(relations_errs_matchqids))
print()

# THE DISTRIBUTION OF RELATIONS WHEN SPANBERT IMPROVES OVER MODEL
print(" THE DISTRIBUTION OF RELATIONS WHEN VARIATION IMPROVES OVER BASE")
relations_errs_matchqids = []
for index, row in df_missed_by_base.iterrows():
    relations_errs_matchqids.append(row['relation'])
print(Counter(relations_errs_matchqids))


THE DISTRIBUTION of RELATIONS WHEN BASE IMPROVES OVER VARIATION
Counter({'no_relation': 268, 'per:date_of_death': 57, 'per:title': 50, 'per:employee_of': 40, 'per:age': 21, 'per:cities_of_residence': 20, 'per:countries_of_residence': 18, 'org:top_members/employees': 18, 'org:country_of_headquarters': 18, 'org:members': 15, 'org:subsidiaries': 15, 'org:parents': 14, 'per:origin': 12, 'per:city_of_death': 12, 'per:charges': 11, 'per:spouse': 10, 'per:children': 9, 'org:city_of_headquarters': 9, 'org:number_of_employees/members': 9, 'per:cause_of_death': 9, 'per:stateorprovinces_of_residence': 8, 'per:religion': 7, 'per:stateorprovince_of_death': 6, 'org:stateorprovince_of_headquarters': 5, 'per:schools_attended': 4, 'org:shareholders': 4, 'per:parents': 4, 'org:alternate_names': 3, 'per:other_family': 3, 'per:stateorprovince_of_birth': 2, 'org:political/religious_affiliation': 2, 'per:city_of_birth': 2, 'per:date_of_birth': 1, 'per:alternate_names': 1, 'per:siblings': 1, 'per:country_of_

In [12]:
# Just for subj and obj
missed_words_lst = []
print("Subj obj missed on variation")
for index, row in df_errors_var.iterrows():
    subj = row['subj']
    missed_words_lst.append(subj)
    obj = row['obj']
    missed_words_lst.append(obj)
        
missed_words = Counter(missed_words_lst).most_common(60)
print(missed_words, '\n\n')

print("Subj obj missed on base")
missed_words_lst = []
for index, row in df_errors_base.iterrows():
    subj = row['subj']
    missed_words_lst.append(subj)
    obj = row['obj']
    missed_words_lst.append(obj)
        
missed_words = Counter(missed_words_lst).most_common(60)
print(missed_words)

Subj obj missed on variation
[("['his']", 206), ("['he']", 198), ("['her']", 51), ("['maria', 'kaczynska']", 47), ("['julius', 'baer']", 39), ("['access', 'industries']", 39), ("['burlington', 'northern', 'santa', 'fe', 'corp.']", 36), ("['she']", 36), ("['eco']", 34), ("['us']", 33), ("['global', 'infrastructure', 'partners']", 32), ("['arcandor']", 31), ("['mohammed', 'sayed', 'tantawi']", 31), ("['galleon', 'group']", 31), ("['him']", 30), ("['eta']", 30), ("['girija', 'prasad', 'koirala']", 29), ("['france']", 29), ("['millipore']", 28), ("['sasac']", 27), ("['thomas', 'more', 'law', 'center']", 27), ("['russia']", 27), ("['stuart', 'rose']", 26), ("['bipartisan', 'policy', 'center']", 26), ("['paul', 'gray']", 22), ("['rosoboronexport']", 21), ("['kissel']", 21), ("['election', 'complaints', 'commission']", 20), ("['public', 'library', 'of', 'science']", 20), ("['united', 'states']", 19), ("['abdul', 'aziz', 'al-hakim']", 19), ("['anwar', 'chowdhry']", 19), ("['nuclear', 'energy',

In [21]:
# for any word, not just subj and obj
missed_words_lst = []
for index, row in df_errors_var.iterrows():
    tokens = row['example'].split(' ')
    for token in tokens:
        if token not in stop_words and token not in punctuation:
            missed_words_lst.append(token)       
missed_words_var = Counter(missed_words_lst).most_common(200)
lst_missed_var = [tup[0] for tup in missed_words_var]

missed_words_lst = []
for index, row in df_errors_base.iterrows():
    tokens = row['example'].split(' ')
    for token in tokens:
        if token not in stop_words and token not in punctuation:
            missed_words_lst.append(token)
missed_words_base = Counter(missed_words_lst).most_common(200)
lst_missed_base = [tup[0] for tup in missed_words_base]

print("missed on var")
#print(missed_words_var, '\n\n')
print([tup for tup in missed_words_var if tup[0] not in lst_missed_base])
print("missed on base")
#print(missed_words_base)
print([tup for tup in missed_words_base if tup[0] not in lst_missed_var])

missed on var
[('merck', 55), ('saturday', 49), ('fe', 48), ('partners', 47), ('part', 46), ('afghanistan', 45), ('killed', 45), ('infrastructure', 43), ('announced', 43), ('music', 43), ('assets', 42), ('eco', 42), ('ali', 42), ('private', 41), ('buffett', 41), ('career', 41), ('business', 41), ('san', 41), ('companies', 40), ('age', 40), ('gadahn', 40), ('like', 40), ('brother', 39), ('&', 39)]
missed on base
[('several', 55), ('10', 48), ('writer', 46), ('worked', 46), ('afp', 45), ('air', 45), ('zapata', 44), ('poland', 44), ('indian', 44), ('management', 43), ('haig', 43), ('television', 43), ('later', 42), ('heart', 42), ('manager', 42), ('rana', 41), ('financial', 41), ('day', 41), ('general', 41), ('prasad', 41), ('saudi', 40), ('based', 40), ('official', 40), ('de', 40)]


In [23]:
# inspect errors for examples with a particular word
inspect_word = 'merck'
mispreds = []
truerels = []
for index, row in df_errors_var.iterrows():
    tokens = row['example'].split(' ')
    if inspect_word in tokens:
        mispreds.append(row['prediction_var']) 
        truerels.append(row['relation'])
mispreds_var = Counter(mispreds).most_common(20)
truerels_var = Counter(truerels).most_common(20)
print("missed on var")
print("PRED: ", mispreds_var, '\n')
print("TRUE: ", truerels_var)

mispreds = []
truerels = []
for index, row in df_errors_base.iterrows():
    tokens = row['example'].split(' ')
    if inspect_word in tokens:
        mispreds.append(row['prediction_var']) 
        truerels.append(row['relation'])
mispreds_base = Counter(mispreds).most_common(20)
truerels_base = Counter(truerels).most_common(20)
print("\n\n\nmissed on base")
print("PRED: ", mispreds_base, '\n')
print("TRUE: ", truerels_base)

missed on var
PRED:  [('no_relation', 18), ('org:parents', 9), ('org:top_members/employees', 5), ('org:subsidiaries', 3), ('org:shareholders', 2), ('org:city_of_headquarters', 1)] 

TRUE:  [('no_relation', 19), ('org:parents', 12), ('org:country_of_headquarters', 3), ('org:city_of_headquarters', 2), ('org:shareholders', 1), ('org:subsidiaries', 1)]



missed on base
PRED:  [('no_relation', 16), ('org:parents', 7), ('org:city_of_headquarters', 2), ('org:top_members/employees', 2), ('org:subsidiaries', 1), ('org:shareholders', 1)] 

TRUE:  [('no_relation', 14), ('org:parents', 9), ('org:city_of_headquarters', 3), ('org:shareholders', 1), ('org:subsidiaries', 1), ('org:country_of_headquarters', 1)]


In [20]:
# pronoun cooccurrence 
num_one_person_var = 0
subj_obj_pronoun = 0
pronouns = ['he', 'she', 'her', 'his', 'him']
for index, row in df_errors_base.iterrows():
    tokens = row['example'].split(' ')
    if any(pronoun for pronoun in pronouns if pronoun in tokens):
        ner_tags = row['stanford_ner']
        ner_tags = ast.literal_eval(ner_tags)
        if len([tag for tag in ner_tags if tag=="PERSON"]) == 1:
#             print(row[['example', 'subj', 'obj']])
#             print()
            num_one_person_var += 1
            if any(pronoun for pronoun in pronouns if pronoun in row['subj'] or pronoun in row['obj']):
                subj_obj_pronoun += 1
print("Of the error examples with pronouns, ", num_one_person_var, " have just one person ner tag in them.")
print("Of the error examples with pronouns, ", subj_obj_pronoun, " have subj or obj as a pronoun")

Of the error examples with pronouns,  249  have just one person ner tag in them.
Of the error examples with pronouns,  142  have subj or obj as a pronoun


### When the subj and obj qid match this tends to indicate alternate names --- bootleg doesn't do well on alternate names --- how does variation do?

In [None]:
count_errors = 0
relations_errs_matchqids = []
for index, row in df_errors_var.iterrows():
    # subj qid
    subj_qids_str = row['subj_qids']
    subj_qids = ast.literal_eval(subj_qids_str)
    subj_qid = subj_qids[0]
    
    # obj qid
    obj_qids_str = row['obj_qids']
    obj_qids = ast.literal_eval(obj_qids_str)
    obj_qid = obj_qids[0]
    
    if subj_qid == obj_qid and subj_qid != 'UNK':
        count_errors += 1
        relations_errs_matchqids.append(row['relation'])
count_errors
print(Counter(relations_errs_matchqids))

count_results = 0
relations_matchqids = []
for index, row in df_results_base.iterrows():
    # subj qid
    subj_qids_str = row['subj_qids']
    subj_qids = ast.literal_eval(subj_qids_str)
    subj_qid = subj_qids[0]
    
    # obj qid
    obj_qids_str = row['obj_qids']
    obj_qids = ast.literal_eval(obj_qids_str)
    obj_qid = obj_qids[0]
    
    if subj_qid == obj_qid and subj_qid != 'UNK':
        count_results += 1
        relations_matchqids.append(row['relation'])
        
print(Counter(relations_matchqids))
count_results

error_rate_orig = df_errors_var.shape[0]/df_results_base.shape[0]
error_rate_new = (df_errors_var.shape[0]-count_errors)/df_results_base.shape[0]
print("original number of errors: ", df_errors_var.shape[0])
print("total instances of two matching qids subj and obj: ", count_results)
print("corrected errors: ", count_errors)
print(error_rate_orig)
print(error_rate_new)

# Look errors based on bootleg relations between the sub/obj


In [None]:
# some constants
proper_noun = ['NNP', 'NNPS']
nonproper_noun = ['NN', 'NNS']
number_pos = ['CD']
nomention = ["['UNK']", "['UNK', 'UNK']", "['UNK', 'UNK', 'UNK']", "['UNK', 'UNK', 'UNK', 'UNK']", 
             "['UNK', 'UNK', 'UNK', 'UNK', 'UNK']"]
cols = ['example', 'relation','prediction_base', 'prediction_var', 'mentions', 'subj_pos', 'obj_pos', 'subj', 'obj', 'subj_mentions', 'subj_qids', 'obj_mentions', 'obj_qids', 'id']

In [None]:
def get_relations(subj_qids_str, obj_qids_str):
    subj_qids = ast.literal_eval(subj_qids_str)
    obj_qids = ast.literal_eval(obj_qids_str)
    subj_qid = subj_qids[0]
    obj_qid = obj_qids[0]
    
    rels = [LoadEntityProfiles.esp.get_relation_name(r) for r in LoadEntityProfiles.esp.get_all_relations(subj_qid, obj_qid)]
    rels = [LoadEntityProfiles.rel_to_name.get(rel, rel) for rel in rels]
    return rels

In [None]:
def get_related_entities(qid):
    # Get all connected qids for a given qid
    related_qids = LoadEntityProfiles.du.get_related_qids(qid, LoadEntityProfiles.esp)
    print(f"Related QIDs {related_qids}")
    print([LoadEntityProfiles.es.get_title(qid) for qid in related_qids])

    lst = [LoadEntityProfiles.es.get_title(qid) for qid in related_qids]

In [None]:
lst_pos_subj_var = []
lst_subj_nomention_var = {}
lst_pos_obj_var = []
lst_obj_nomention_var = {}

sub_df = df_missed_by_variation[cols]
for index, row in sub_df.iterrows():
    if any(pos in row['subj_pos'] for pos in proper_noun): # is the subj a proper noun?
        lst_pos_subj_var.append(row['subj'])
        if any(null in row['subj_mentions'] for null in nomention): # is the bootleg mention empty for this proper noun?
            lst_subj_nomention_var[row['id']] = row['subj']
            
    if any(pos in row['obj_pos'] for pos in proper_noun):
        lst_pos_obj_var.append(row['obj'])
        if any(null in row['obj_mentions'] for null in nomention):
            lst_obj_nomention_var[row['id']] = row['obj']
        
print("The number of proper noun subj in var errors are:", len(lst_pos_subj_var), "and obj are:", len(lst_pos_obj_var))
print("The number of proper noun subj in var errors that don't get a bootleg mention are:", len(lst_subj_nomention_var.keys()), "and obj are:", len(lst_obj_nomention_var.keys()))
print()

lst_pos_subj_var.extend(lst_pos_obj_var)
print("var DOES WORSE THAN BOOTLEG base ON THESE SUBJ/OBJ PROPER NOUNS, THIS MANY TIMES:")#print(len(lst_pos_subj_var))
worst_for_var = Counter(lst_pos_subj_var).most_common(50)
print(worst_for_var)
worst_for_var = [tup[0] for tup in worst_for_var]


In [None]:
# Inspecting examples where bootleg had any relation for the subj, obj pair

count = 0
for index, row in df_missed_by_variation.iterrows():
    if get_relations(row['subj_qids'], row['obj_qids']):
        count += 1
print("For ", count, " var errors, bootleg has *some* relation between the subj and obj")

count = 0
for index, row in df_missed_by_variation.iterrows():
    rels = get_relations(row['subj_qids'], row['obj_qids'])
    if rels and not any(rel for rel in rels if rel in row['relation']):
        print(row[['example', 'subj', 'obj', 'subj_qids', 'obj_qids', 'relation', 'prediction_base', 'prediction_var', 'mentions']])
        print(f"Rels {rels}")
        print()
        count += 1

print("For ", count, " var errors, the existing bootleg relation is NOT a subset of the gold relation")


In [None]:
# Inspecting examples where bootleg had a relation for the subj, obj pair
# booterrors_with_relation_df = df_errors[df_errors['id'].isin(ids_missed_propernoun_with_bootrels)]
count = 0
for index, row in df_missed_by_variation.iterrows():
    rels = get_relations(row['subj_qids'], row['obj_qids'])
    if rels and any(rel for rel in rels if rel in row['relation']):
        print(row[['example', 'subj', 'obj', 'subj_qids', 'obj_qids', 'relation', 'prediction_base', 'prediction_var', 'mentions']])
        print(f"Rels {rels}")
        print()
        count += 1
        
print("For ", count, " spanbert errors, the bootleg relation is a subset (or the same) as the gold relation")


In [None]:
# Inspecting examples where bootleg had a relation for the subj, obj pair
# booterrors_with_relation_df = df_errors[df_errors['id'].isin(ids_missed_propernoun_with_bootrels)]
count = 0
for index, row in df_missed_by_variation.iterrows():
    rels = get_relations(row['subj_qids'], row['obj_qids'])
    if rels and any(rel for rel in rels if rel in row['prediction_var']):
        print(row[['example', 'subj', 'obj', 'subj_qids', 'obj_qids', 'relation', 'prediction_base', 'prediction_var', 'mentions']])
        print(f"Rels {rels}")
        print()
        count += 1
        
print("For ", count, " var errors, the bootleg relation is a subset (or the same) as the var prediction")


### FOR EXAMPLES WHERE BASE IS BETTER THAN VAR

In [None]:
lst_pos_subj_model = []
lst_subj_nomention_model = {}
lst_pos_obj_model = []
lst_obj_nomention_model= {}

sub_df = df_missed_by_base[cols]
for index, row in sub_df.iterrows():
    if any(pos in row['subj_pos'] for pos in proper_noun): # is the subj a proper noun?
        lst_pos_subj_model.append(row['subj'])
        if any(null in row['subj_mentions'] for null in nomention): # is the bootleg mention empty for this proper noun?
            lst_subj_nomention_model[row['id']] = row['subj']
            
    if any(pos in row['obj_pos'] for pos in proper_noun):
        lst_pos_obj_model.append(row['obj'])
        if any(null in row['obj_mentions'] for null in nomention):
            lst_obj_nomention_model[row['id']] = row['obj']
        
print("The number of proper noun subj in base (&not var) errors are:", len(lst_pos_subj_var), "and obj are:", len(lst_pos_obj_var))
print("The number of proper noun subj in base (&not var) errors that don't get a bootleg mention are:", len(lst_subj_nomention_var.keys()), "and obj are:", len(lst_obj_nomention_var.keys()))
print()

lst_pos_subj_model.extend(lst_pos_obj_model)
print("var DOES BETTER THAN base MODEL ON THESE SUBJ/OBJ PROPER NOUNS")#print(len(lst_pos_subj_spanbert))
worst_for_model = Counter(lst_pos_subj_model).most_common(50)
print(worst_for_model)
worst_for_model = [tup[0] for tup in worst_for_model]

In [None]:
sub_df = df_model_not_spanbert_errors[cols] #BOOTLEG ERRORS, and NOT BASELINE ERRORS
count_missed_propernoun = 0
count_missed_propernoun_with_bootrels = 0
ids_missed_propernoun_with_bootrels = []
for index, row in sub_df.iterrows():
    if any(pos in row['subj_pos'] for pos in proper_noun) or any(pos in row['obj_pos'] for pos in proper_noun):
#        print(row[['example', 'subj', 'obj', 'subj_qids', 'obj_qids', 'relation', 'prediction_model', 'prediction_spanbert', 'mentions']])
        rels = get_relations(row['subj_qids'], row['obj_qids'])
        if rels:
            count_missed_propernoun_with_bootrels += 1
            ids_missed_propernoun_with_bootrels.append(row['id'])
#         print(f"Rels {rels}")
#         print()
        count_missed_propernoun += 1
        
print("Number of examples where bootleg had a relation for the subj, obj pair: ", count_missed_propernoun_with_bootrels)
print("Number of examples where subj and/or obj is a proper noun: ", count_missed_propernoun)


In [None]:
# Inspecting examples where bootleg had any relation for the subj, obj pair
# booterrors_with_relation_df = df_errors[df_errors['id'].isin(ids_missed_propernoun_with_bootrels)]
count = 0
for index, row in df_results.iterrows():
    if get_relations(row['subj_qids'], row['obj_qids']):
        count += 1
print("For ", count, " bootleg errors, bootleg has *some* relation between the subj and obj\n")


print("For these bootleg errors, the existing bootleg relation is NOT a string-subset of the gold relation")
count = 0
for index, row in df_results.iterrows():
    rels = get_relations(row['subj_qids'], row['obj_qids'])
    if rels and not any(rel for rel in rels if rel in row['relation']):
#         print(row[['example', 'subj', 'obj', 'subj_qids', 'obj_qids', 'relation', 'prediction_model', 'prediction_spanbert', 'mentions']])
#         print(f"Rels {rels}")
#         print()
        count += 1

print("For ", count, " bootleg errors, the existing bootleg relation is NOT a subset of the gold relation")


In [None]:
# Inspecting examples where bootleg had a relation for the subj, obj pair
# booterrors_with_relation_df = df_errors[df_errors['id'].isin(ids_missed_propernoun_with_bootrels)]
count = 0
for index, row in df_errors.iterrows():
    rels = get_relations(row['subj_qids'], row['obj_qids'])
    if rels and any(rel for rel in rels if rel in row['relation']):
        print(row[['example', 'subj', 'obj', 'subj_qids', 'obj_qids', 'relation', 'prediction_model', 'prediction_spanbert', 'mentions']])
        print(f"Rels {rels}")
        print()
        count += 1
        
print("For ", count, " bootleg errors, the bootleg relation is a subset (or the same) as the gold relation")


In [None]:
# Inspecting examples where bootleg had a relation for the subj, obj pair
# booterrors_with_relation_df = df_errors[df_errors['id'].isin(ids_missed_propernoun_with_bootrels)]
count = 0
for index, row in df_errors.iterrows():
    rels = get_relations(row['subj_qids'], row['obj_qids'])
    if rels and any(rel for rel in rels if rel in row['prediction_model']):
        print(row[['example', 'subj', 'obj', 'subj_qids', 'obj_qids', 'relation', 'prediction_model', 'prediction_spanbert', 'mentions']])
        print(f"Rels {rels}")
        print()
        count += 1
        
print("For ", count, " bootleg errors, the bootleg relation is a subset (or the same) as the bootleg prediction")
