### Konstantina Andronikou

## Comparison of Names as a Robustness test

In [13]:
from allennlp_models.pretrained import load_predictor

In [14]:
import checklist
import logging 
import csv
from checklist.editor import Editor
from checklist.perturb import Perturb
from checklist.test_types import MFT, INV, DIR
from checklist.expect import Expect

In [15]:
from checklist.pred_wrapper import PredictorWrapper

### Western Names

In [16]:
#removing all warnings 
logging.getLogger('allennlp.common.params').disabled = True 
logging.getLogger('allennlp.nn.initializers').disabled = True 
logging.getLogger('allennlp.modules.token_embedders.embedding').disabled = True 
logging.getLogger('urllib3.connectionpool').disabled = True 
logging.getLogger('allennlp.common.plugins').disabled = True 
logging.getLogger('allennlp.common.model_card').disabled = True 
logging.getLogger('allennlp.models.archival').disabled = True 
logging.getLogger('allennlp.data.vocabulary').disabled = True 
logging.getLogger('cached_path').disabled = True
srl_predictor = load_predictor('structured-prediction-srl')
output = srl_predictor.predict("Maria Smith likes to eat pizza")
output

{'verbs': [{'verb': 'likes',
   'description': '[ARG0: Maria Smith] [V: likes] [ARG1: to eat pizza]',
   'tags': ['B-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'I-ARG1']},
  {'verb': 'eat',
   'description': '[ARG0: Maria Smith] likes to [V: eat] [ARG1: pizza]',
   'tags': ['B-ARG0', 'I-ARG0', 'O', 'O', 'B-V', 'B-ARG1']}],
 'words': ['Maria', 'Smith', 'likes', 'to', 'eat', 'pizza']}

In [17]:
def predict_srl(data):
    
    pred = []
    for d in data:
        pred.append(srl_predictor.predict(d))
    return pred

predict_and_conf = PredictorWrapper.wrap_predict(predict_srl)

In [18]:
d = ["Maria Smith likes to eat pizza"]
pred = predict_and_conf(d)
pred

([{'verbs': [{'verb': 'likes',
     'description': '[ARG0: Maria Smith] [V: likes] [ARG1: to eat pizza]',
     'tags': ['B-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'I-ARG1']},
    {'verb': 'eat',
     'description': '[ARG0: Maria Smith] likes to [V: eat] [ARG1: pizza]',
     'tags': ['B-ARG0', 'I-ARG0', 'O', 'O', 'B-V', 'B-ARG1']}],
   'words': ['Maria', 'Smith', 'likes', 'to', 'eat', 'pizza']}],
 array([1.]))

In [19]:
def format_srl(x, pred, conf, label=None, meta=None):
    
    return pred['verbs'][0]['description']

In [20]:
def get_arg(pred, arg_target='ARG0'):
    predicate_arguments = pred['verbs'][0]
    words = pred['words']
    tags = predicate_arguments['tags']
    
    arg_list = []
    for t, w in zip(tags, words):
        arg = t
        if '-' in t:
            arg = t.split('-')[1]
        if arg == arg_target:
            arg_list.append(w)
    arg_set = set(arg_list)
    return arg_set

In [21]:
def format_srl(x, pred, conf, label=None, meta=None):
    results = []
    predicate_structure = pred['verbs'][0]['description']
        
    return predicate_structure

In [22]:
def found_arg0_people(x, pred, conf, label=None, meta=None):

    people = set([meta['first_name'],meta['last_name']])
    arg_0 = get_arg(pred, arg_target='ARG0')

    if arg_0 == people:
        pass_ = True
    else:
        pass_ = False
    return pass_


expect_arg0 = Expect.single(found_arg0_people)

In [23]:

editor = Editor()

# create examples
t = editor.template('{first_name} {last_name} likes to eat pizza', meta=True, nsamples=1000, remove_duplicates = True)

for k, v in t.items():
    print(k,v)

meta [{'first_name': 'Grace', 'last_name': 'Price'}, {'first_name': 'Martin', 'last_name': 'Ryan'}, {'first_name': 'Jimmy', 'last_name': 'Hart'}, {'first_name': 'Stephanie', 'last_name': 'Moore'}, {'first_name': 'Sandra', 'last_name': 'Kennedy'}, {'first_name': 'Billy', 'last_name': 'Jones'}, {'first_name': 'Lucy', 'last_name': 'Walker'}, {'first_name': 'Claire', 'last_name': 'Stone'}, {'first_name': 'Frances', 'last_name': 'Gray'}, {'first_name': 'Amy', 'last_name': 'King'}, {'first_name': 'Kim', 'last_name': 'Crawford'}, {'first_name': 'Julie', 'last_name': 'White'}, {'first_name': 'Nick', 'last_name': 'Bell'}, {'first_name': 'Amanda', 'last_name': 'Crawford'}, {'first_name': 'Alison', 'last_name': 'King'}, {'first_name': 'John', 'last_name': 'Martin'}, {'first_name': 'Kim', 'last_name': 'Davies'}, {'first_name': 'Edith', 'last_name': 'Hamilton'}, {'first_name': 'David', 'last_name': 'Butler'}, {'first_name': 'Wendy', 'last_name': 'Marshall'}, {'first_name': 'Ron', 'last_name': 'Cart

In [24]:
with open('dataset/Western_names.txt', 'w') as f:
    print(t.data, file = f)

In [25]:
test = MFT(**t, name = 'detect_arg0name_default_position', expect=expect_arg0)
output = test.run(predict_and_conf)
test.summary(format_example_fn=format_srl)
i = test.results['preds']
expect_for_i = test.results['expect_results']
with open ('Output/Western_names.csv','w') as f:
    writer = csv.writer(f)
    for result, exp in zip(i, expect_for_i):
        
        case = result['words']
        exp = exp
        writer.writerow([case, exp])
        print(case,exp)

Predicting 999 examples
Test cases:      999
Fails (rate):    28 (2.8%)

Example fails:
[ARG0: Amanda] Wright [V: likes] [ARG1: to eat pizza]
----
[ARG0: Ray] Lewis [V: likes] [ARG1: to eat pizza]
----
[ARG0: Jeff] Lewis [V: likes] [ARG1: to eat pizza]
----
['Grace', 'Price', 'likes', 'to', 'eat', 'pizza'] [ True]
['Martin', 'Ryan', 'likes', 'to', 'eat', 'pizza'] [ True]
['Jimmy', 'Hart', 'likes', 'to', 'eat', 'pizza'] [ True]
['Stephanie', 'Moore', 'likes', 'to', 'eat', 'pizza'] [ True]
['Sandra', 'Kennedy', 'likes', 'to', 'eat', 'pizza'] [ True]
['Billy', 'Jones', 'likes', 'to', 'eat', 'pizza'] [ True]
['Lucy', 'Walker', 'likes', 'to', 'eat', 'pizza'] [ True]
['Claire', 'Stone', 'likes', 'to', 'eat', 'pizza'] [ True]
['Frances', 'Gray', 'likes', 'to', 'eat', 'pizza'] [ True]
['Amy', 'King', 'likes', 'to', 'eat', 'pizza'] [ True]
['Kim', 'Crawford', 'likes', 'to', 'eat', 'pizza'] [ True]
['Julie', 'White', 'likes', 'to', 'eat', 'pizza'] [ True]
['Nick', 'Bell', 'likes', 'to', 'eat', '

In [26]:
# the following piece of code was adapted from https://howtodoinjava.com/examples/python-print-to-file/
import sys
 
original_stdout = sys.stdout  
 
with open('Output/false_sentences_Western_names.txt', 'a') as f:
    sys.stdout = f 
    print(test.summary(format_example_fn=format_srl), file = f)
    # Reset the standard output
    sys.stdout = original_stdout 

## Non-Western Names 

In [30]:
first = [x.split()[0] for x in editor.lexicons.male_from.Turkey +  editor.lexicons.female_from.Turkey]
last = [x.split()[0] for x in editor.lexicons.last_from.Turkey]

In [31]:
with open('dataset/Non-western_names.txt', 'w') as f:
    print(t.data, file = f)

In [29]:
t = editor.template("{first_name} {last_name} likes to eat pizza.", first_name=first, last_name=last, meta=True, nsamples=1000)
test = MFT(**t, expect=expect_arg0)
test.run(predict_and_conf)
test.summary(format_example_fn=format_srl)
i = test.results['preds']
expect_for_i = test.results['expect_results']
with open ('Output/Non-Western_names.csv','w') as f:
    writer = csv.writer(f)
    for result, exp in zip(i, expect_for_i):
        
        case = result['words']
        exp = exp
        writer.writerow([case, exp])
        print(case,exp)

Predicting 1000 examples
Test cases:      1000
Fails (rate):    34 (3.4%)

Example fails:
[ARGM-TMP: Ibrahim] [ARG0: Aksoy] [V: likes] [ARG1: to eat pizza] .
----
[ARG2: Mohammed] [V: Can] [ARG1: likes to eat pizza] .
----
[V: Pakize] [ARG1: Tarhan likes to eat pizza] .
----
['Ayşe', 'Erdoğan', 'likes', 'to', 'eat', 'pizza', '.'] [ True]
['Deniz', 'Can', 'likes', 'to', 'eat', 'pizza', '.'] [False]
['Gülşen', 'Demirel', 'likes', 'to', 'eat', 'pizza', '.'] [ True]
['Enver', 'Vural', 'likes', 'to', 'eat', 'pizza', '.'] [ True]
['Betül', 'Dikmen', 'likes', 'to', 'eat', 'pizza', '.'] [ True]
['Buse', 'Aktaş', 'likes', 'to', 'eat', 'pizza', '.'] [ True]
['Adem', 'Aksu', 'likes', 'to', 'eat', 'pizza', '.'] [ True]
['Ekrem', 'Durmaz', 'likes', 'to', 'eat', 'pizza', '.'] [ True]
['Şevket', 'Gedik', 'likes', 'to', 'eat', 'pizza', '.'] [ True]
['Ismail', 'Şen', 'likes', 'to', 'eat', 'pizza', '.'] [ True]
['Nur', 'Sezer', 'likes', 'to', 'eat', 'pizza', '.'] [ True]
['İnci', 'Yılmaz', 'likes', 'to'

In [32]:
# the following piece of code was adapted from https://howtodoinjava.com/examples/python-print-to-file/
import sys
 
original_stdout = sys.stdout  
 
with open('Output/false_sentences_Non-Western_names.txt', 'a') as f:
    sys.stdout = f 
    print(test.summary(format_example_fn=format_srl), file = f)
    # Reset the standard output
    sys.stdout = original_stdout 