# Substitute generation examples

In [1]:
import sys
from pathlib import Path

LEXSUBGEN_ROOT = str(Path().resolve().parent)

if LEXSUBGEN_ROOT not in sys.path:
    sys.path.insert(0, LEXSUBGEN_ROOT)

In [2]:
from lexsubgen import SubstituteGenerator

In [3]:
CONFIGS_PATH = Path().resolve().parent / "configs"

In [4]:
import os
# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="1,2"

In [5]:
# Loading substitute generator
sg = SubstituteGenerator.from_config(
    str(CONFIGS_PATH / "subst_generators" / "lexsub" / "xlnet_embs.jsonnet")
)

In [11]:
sentences = [['The', 'United', 'Nations', 'in', 'East', 'Timor', 'has', 'issued', 'the', 'first', 'indictments', 'of', 'suspects', 'in', 'the', 'atrocities', 'commited', 'during', 'the', 'territory', "'s", '1999', 'vote', 'to', 'separate', 'from', 'Indonesia', '.', 'Patricia', 'Nunan', 'reports', 'from', 'Jakarta', '.']]
target_ids = [16]
substitutes, w2id = sg.generate_substitutes(sentences, target_ids, target_pos=["v"])

print(f"Sentence: '{' '.join(sentences[0])}'")
print()
print(f"Target word: '{sentences[0][target_ids[0]]}'")
print()
print("Predicted substitutes", substitutes[0])

Sentence: 'The United Nations in East Timor has issued the first indictments of suspects in the atrocities commited during the territory 's 1999 vote to separate from Indonesia . Patricia Nunan reports from Jakarta .'

Target word: 'commited'

Predicted substitutes ['perpetrate', 'suffer', 'witness', 'unleash', 'conduct', 'report', 'experience', 'occur', 'sustain', 'see']


In [12]:
sentences = ["I love cats".split()]
target_ids = [2]
substitutes, w2id = sg.generate_substitutes(sentences, target_ids, target_pos=["n"])

print(f"Sentence: '{' '.join(sentences[0])}'")
print()
print(f"Target word: '{sentences[0][target_ids[0]]}'")
print()
print("Predicted substitutes", substitutes[0])

Sentence: 'I love cats .'

Target word: 'cats'

Predicted substitutes ['dog', 'animal', 'pet', 'bird', 'horse', 'puppy', 'chicken', 'rat', 'wolf', 'cow']


# Lemmatization example

In [8]:
from lexsubgen.utils.lemmatize import nltk_lemmatize

In [9]:
nltk_lemmatize(['remains', 'join', 'enters', 'begin', 'turn', 'develops', 'receives', 'creates', 'resembles', 'is'], "v")

['remain',
 'join',
 'enter',
 'begin',
 'turn',
 'develop',
 'receive',
 'create',
 'resemble',
 'be']

In [10]:
nltk_lemmatize(['is', 'get', 'remains', 'enters', 'win', 'join', 'play', 'begin', 'make', 'start'], "v")

['be',
 'get',
 'remain',
 'enter',
 'win',
 'join',
 'play',
 'begin',
 'make',
 'start']