Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
author = "UVA QData Lab"

# The full version, including alpha/beta/rc tags
release = "0.3.6"
release = "0.3.7"

# Set master doc to `index.rst`.
master_doc = "index"
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ filelock
language_tool_python
lemminflect
lru-dict
datasets==2.2.2
datasets==2.4.0
nltk
numpy>=1.21.0
pandas>=1.0.1
scipy>=1.4.1
torch>=1.7.0,!=1.8
transformers>=3.3.0
transformers>=4.21.0
terminaltables
tqdm
word2number
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@
]

extras["optional"] = [
"sentence_transformers>0.2.6",
"sentence_transformers==2.2.0",
"stanza",
"visdom",
"wandb",
"gensim==3.8.3",
"gensim==4.1.2",
]

# For developers, install development tools along with all optional dependencies.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
multilingual universal sentence encoder
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
"""
import tensorflow_text # noqa: F401

from textattack.constraints.semantics.sentence_encoders import SentenceEncoder
from textattack.shared.utils import LazyLoader

hub = LazyLoader("tensorflow_hub", globals(), "tensorflow_hub")
tensorflow_text = LazyLoader("tensorflow_text", globals(), "tensorflow_text")


class MultilingualUniversalSentenceEncoder(SentenceEncoder):
Expand Down
2 changes: 1 addition & 1 deletion textattack/models/tokenizers/glove_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def __init__(
word_list_file = tempfile.NamedTemporaryFile()
word_list_file.write(json.dumps(word_id_map).encode())

word_level = hf_tokenizers.models.WordLevel(
word_level = hf_tokenizers.models.WordLevel.from_file(
word_list_file.name, unk_token=str(unk_token)
)
tokenizer = hf_tokenizers.Tokenizer(word_level)
Expand Down
2 changes: 2 additions & 0 deletions textattack/shared/utils/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ def batch_model_predict(model_predict, inputs, batch_size=32):
"""
outputs = []
i = 0
# print("batch_model_predict", inputs.shape)
# print("inputs:", inputs)
while i < len(inputs):
batch = inputs[i : i + batch_size]
batch_preds = model_predict(batch)
Expand Down
24 changes: 11 additions & 13 deletions textattack/shared/word_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,9 +312,7 @@ class GensimWordEmbedding(AbstractWordEmbedding):
def __init__(self, keyed_vectors):
gensim = utils.LazyLoader("gensim", globals(), "gensim")

if isinstance(
keyed_vectors, gensim.models.keyedvectors.WordEmbeddingsKeyedVectors
):
if isinstance(keyed_vectors, gensim.models.KeyedVectors):
self.keyed_vectors = keyed_vectors
else:
raise ValueError(
Expand All @@ -335,11 +333,11 @@ def __getitem__(self, index):
"""
if isinstance(index, str):
try:
index = self.keyed_vectors.vocab.get(index).index
index = self.keyed_vectors.key_to_index.get(index)
except KeyError:
return None
try:
return self.keyed_vectors.vectors_norm[index]
return self.keyed_vectors.get_normed_vectors()[index]
except IndexError:
# word embedding ID out of bounds
return None
Expand All @@ -352,10 +350,10 @@ def word2index(self, word):
Returns:
index (int)
"""
vocab = self.keyed_vectors.vocab.get(word)
vocab = self.keyed_vectors.key_to_index.get(word)
if vocab is None:
raise KeyError(word)
return vocab.index
return vocab

def index2word(self, index):
"""
Expand All @@ -368,7 +366,7 @@ def index2word(self, index):
"""
try:
# this is a list, so the error would be IndexError
return self.keyed_vectors.index2word[index]
return self.keyed_vectors.index_to_key[index]
except IndexError:
raise KeyError(index)

Expand All @@ -386,8 +384,8 @@ def get_mse_dist(self, a, b):
try:
mse_dist = self._mse_dist_mat[a][b]
except KeyError:
e1 = self.keyed_vectors.vectors_norm[a]
e2 = self.keyed_vectors.vectors_norm[b]
e1 = self.keyed_vectors.get_normed_vectors()[a]
e2 = self.keyed_vectors.get_normed_vectors()[b]
e1 = torch.tensor(e1).to(utils.device)
e2 = torch.tensor(e2).to(utils.device)
mse_dist = torch.sum((e1 - e2) ** 2).item()
Expand All @@ -406,9 +404,9 @@ def get_cos_sim(self, a, b):
distance (float): cosine similarity
"""
if not isinstance(a, str):
a = self.keyed_vectors.index2word[a]
a = self.keyed_vectors.index_to_key[a]
if not isinstance(b, str):
b = self.keyed_vectors.index2word[b]
b = self.keyed_vectors.index_to_key[b]
cos_sim = self.keyed_vectors.similarity(a, b)
return cos_sim

Expand All @@ -421,7 +419,7 @@ def nearest_neighbours(self, index, topn, return_words=True):
Returns:
neighbours (list[int]): List of indices of the nearest neighbours
"""
word = self.keyed_vectors.index2word[index]
word = self.keyed_vectors.index_to_key[index]
return [
self.word2index(i[0])
for i in self.keyed_vectors.similar_by_word(word, topn)
Expand Down