In [1]:
import pandas as pd

demos_list = pd.read_csv("../../dataset/sentiment/8_random_demos_rt.csv")

In [7]:
import textattack
import transformers

# Load model, tokenizer, and model_wrapper
model = transformers.AutoModelForSequenceClassification.from_pretrained("textattack/roberta-base-rotten-tomatoes")
tokenizer = transformers.AutoTokenizer.from_pretrained("textattack/roberta-base-rotten-tomatoes")
model_wrapper = textattack.models.wrappers.HuggingFaceModelWrapper(model, tokenizer)

# Construct our four components for `Attack`
from textattack.constraints.pre_transformation import RepeatModification, StopwordModification, MaxModificationRate
from textattack.constraints.semantics import WordEmbeddingDistance
from textattack.transformations import WordSwapRandomCharacterSubstitution
from textattack.search_methods import GreedySearch, GeneticAlgorithm, BeamSearch

demo_index = 0

goal_function = textattack.goal_functions.TargetedClassification(model_wrapper, target_class=int(1-demos_list['label'][demo_index]))
constraints = [
    MaxModificationRate(max_rate = 0.2),
    WordEmbeddingDistance(min_cos_sim=0.1)
]
transformation = WordSwapRandomCharacterSubstitution()
search_method = GreedySearch()

# Construct the actual attack
attack = textattack.Attack(goal_function, constraints, transformation, search_method)


attack_result = attack.attack(demos_list['sentence'][demo_index], int(demos_list['label'][demo_index]))
print(attack_result)

Some weights of the model checkpoint at textattack/roberta-base-rotten-tomatoes were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
textattack: Unknown if model of class <class 'transformers.models.roberta.modeling_roberta.RobertaForSequenceClassification'> compatible with goal function <class 'textattack.goal_functions.classification.targeted_classification.TargetedClassification'>.


1 (99%) --> 0 (75%)

the result is something quite fresh and delightful .

the result is something quite frekh and dzlightful .


In [12]:
from IPython.display import display, HTML

def highlight_differences(orig, perturbed):
    assert len(orig) == len(perturbed), "Sentences must have the same length!"

    result = "perturbed text: "
    for o_char, p_char in zip(orig, perturbed):
        if o_char == p_char:
            result += p_char
        else:
            result += f'<font color="red">{p_char}</font>'

    return result

orig_text = attack_result.original_text()
perturbed_text = attack_result.perturbed_text()

highlighted = highlight_differences(orig_text, perturbed_text)

print(f'original text: {orig_text}')
display(HTML(highlighted))


original text: the result is something quite fresh and delightful .
