In [1]:
import checklist
from checklist.editor import Editor
from checklist.perturb import Perturb
import numpy as np
editor = Editor()

# test 1: Simple sentence with one predicate

In [58]:
ret = editor.template('{first_name1} {verb} {first_name2}.', 
                      verb=['likes', 'hates', 'loves'], remove_duplicates=True)
test1 = np.random.choice(ret.data, 100)

# test 2: Complex sentence with multiple predicates

In [None]:
ret = editor.template('{first_name1} {verb_one} {mask}, {verb_two} {mask} and {verb_three} for {first_name2} to {verb_four}.', 
                      verb_one = ['called', 'contacted', 'saw', 'heared'],
                      verb_two = ['called', 'contacted', 'saw', 'heared', 'picked'],
                      verb_three = ['waited'],
                      verb_four=['eat', 'feel', 'contemplate', 'respond', 'react'],
                      remove_duplicates=True)
np.random.choice(ret.data, 100)

# test 3: Simple passive voice with one predicate

In [3]:
def convert_to_passive(sentence):

    # Tokenize the sentence
    tokens = sentence.strip('.').split()

    # Identify the agent and patient and verb
    agent = tokens[0]
    patient = tokens[2]
    verb = tokens[1]

    # Create the passive voice sentence
    verb = verb[:-1]+'d'
    passive_sentence = f"{patient} is {verb} by {agent}"

    return passive_sentence

In [17]:
test3 = Perturb.perturb(test1, convert_to_passive).data

# test4.1: Misleading ”by” with passive voice: location

In [13]:
# editor.related_words('He was seen by the river', 'river')
# After running this, manual selection was performed and put into the following list

In [37]:
places = ['river', 'sea', 'canal', 'water', 'vehicle', 'wall', 'lake', 'ice', 'mountain', 'waterfall', 'property', 'farm', 'association', 'range', 'reservoir']

In [38]:
test4_1 = editor.template('He was seen by the {place}.', place=places).data

In [None]:
test4_1

# test 4.2: Misleading "by" with passive voice: instrument

In [39]:
# editor.related_words('He was killed by a knife', 'knife')
# After running this, manual selection was performed and put into the following list

In [40]:
instrument = ['bullet', 'shotgun', 'missile', 'gun', 'rifle', 'shot', 'sword', 'slug', 'blade', 'firearm', 'projectile', 'handgun', 'ball', 'dart', 'pistol', 'dagger', 'spear', 'BB', 'bow', 'cannon']

In [41]:
test4_2 = editor.template('He was killed by {a:instrument}.', instrument=instrument).data

In [42]:
test4_1

['He was seen by the river.',
 'He was seen by the sea.',
 'He was seen by the canal.',
 'He was seen by the water.',
 'He was seen by the vehicle.',
 'He was seen by the wall.',
 'He was seen by the lake.',
 'He was seen by the ice.',
 'He was seen by the mountain.',
 'He was seen by the waterfall.',
 'He was seen by the property.',
 'He was seen by the farm.',
 'He was seen by the association.',
 'He was seen by the range.',
 'He was seen by the reservoir.']

# test5: Explainable ambiguity

# test6: Different context for the same word

In [55]:
ret = editor.template('I run the {mask}.')

In [56]:
ret.data

['I run the race.',
 'I run the course.',
 'I run the marathon.',
 'I run the route.',
 'I run the mile.',
 'I run the loop.',
 'I run the numbers.',
 'I run the test.',
 'I run the treadmill.',
 'I run the track.',
 'I run the miles.',
 'I run the hills.',
 'I run the walk.',
 'I run the dog.',
 'I run the distance.',
 'I run the show.',
 'I run the streets.',
 'I run the risk.',
 'I run the thing.',
 'I run the trial.',
 'I run the run.',
 'I run the line.',
 'I run the math.',
 'I run the stairs.',
 'I run the circuit.',
 'I run the relay.',
 'I run the races.',
 'I run the video.',
 'I run the leg.',
 'I run the program.',
 'I run the dogs.',
 'I run the class.',
 'I run the Marathon.',
 'I run the challenge.',
 'I run the plank.',
 'I run the rest.',
 'I run the bike.',
 'I run the Y.',
 'I run the shuttle.',
 'I run the section.',
 'I run the table.',
 'I run the tests.',
 'I run the legs.',
 'I run the routes.',
 'I run the simulation.',
 'I run the event.',
 'I run the loops.',

# test7: Different words in the same context

In [102]:
ret = editor.template('{mask}', remove_duplicates=True)
x = np.random.choice(ret.data, 100)

In [103]:
len(set(x))

68

In [104]:
x

array(['2015', '.', '?', '7', 'xi', '***', '2018', 'Print', '1',
       'Advertisement', 'Contact', 'Japan', '4', 'Notes', ':', '*',
       'Education', '21', '...', '2013', '§', 'Discussion', ':', 'Canada',
       'California', 'California', 'xxx', '===', '!', '…', '7', '2',
       'Video', 'YouTube', 'Conclusion', '-', 'Advertisement',
       'advertisement', 'Comments', '-----', '5', 'Texas', '…', 'Content',
       '?', '–', 'Japan', '3', '15', 'I', '11', '-', '3', 'Canada', '13',
       'II', 'Contact', '10', '2014', 'Source', 'Photo', '------',
       '*****', 'Print', 'Canada', 'Notes', '19', 'California', '2', '10',
       'Notes', 'Conclusion', '7', 'Introduction', '9', 'References',
       '2015', '-', '—-', 'Links', '5', 'III', 'Abstract', '—', '9',
       '*****', 'California', 'Notes', 'Japan', 'Quotes', 'SPONSORED',
       '****', '--------', 'III', 'Texas', '9', 'D', 'A', '23', '3'],
      dtype='<U24')