In [None]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
import OpenAttack
import numpy as np
import datasets
from textblob import TextBlob

class MyClassifier(OpenAttack.Classifier):
    def get_pred(self, input_):
        return self.get_prob(input_).argmax(axis=1)

    def get_prob(self, input_):
        ret = []
        for sent in input_:
            text_blob = TextBlob(sent)
            polarity = text_blob.sentiment.polarity

            if polarity > 0:
                ret.append(np.array([0, 1]))
            else:
                ret.append(np.array([1, 0]))

        return np.array(ret)

def dataset_mapping(x):
    return {
        "x": x["sentence"],
        # "y": 1 if x["label"] > 0.5 else 0,
        "y": x["label"],
    }

def main():
    # 从 datasets 库中加载 IMDB 数据集的前 1000 个样本
    imdb_train = datasets.load_dataset("sst", split="train[:10]").map(function=dataset_mapping)

    # 使用自定义分类器作为受害者模型
    victim = MyClassifier()

    # 选择 DWB 作为攻击者并使用默认参数初始化
    attacker = OpenAttack.attackers.PWWSAttacker()  # 修改此行

    # 准备进行攻击
    attack_eval = OpenAttack.AttackEval(attacker, victim, metrics = [
        OpenAttack.metric.SemanticSimilarity(),
        OpenAttack.metric.GrammaticalErrors(),
        # OpenAttack.metric.Fluency()
        # OpenAttack.metric.EditDistance(),
        # OpenAttack.metric.ModificationRate(),
    ])

    # 发起攻击并打印攻击结果
    attack_eval.eval(imdb_train, visualize=True)

if __name__ == "__main__":
    main()




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
it ' s the work of an artist , one whose    | Victim Model Queries:    221      
                                            | Semantic Similarity:     0.9866516
view of America , history and the           | Grammatical Errors:      8        
view of america , history and the           | Succeed:                 yes      
                                            |                                   
awkwardness of human life is generous and   |                                   
awkwardness of human life is generous and   |                                   
                                            |                                   
deep .                                      |                                   
deep .                                      |                                   
                                            |                                   
[32mLabel: 0 (100.00%) --> 1 (100.00%)[0m 

In [None]:
import OpenAttack
from textblob import TextBlob
from datasets import load_dataset

# 定义一个使用 TextBlob 的情感分类模型
class TextBlobClassifier(OpenAttack.Classifier):
    def __init__(self):
        pass

    def get_prob(self, input_):
        sentiment = TextBlob(input_).sentiment.polarity
        prob = (sentiment + 1) / 2  # 将情感极性值映射到 [0, 1] 区间
        return [1 - prob, prob]  # 返回负面和正面情感的概率

def dataset_mapping(x):
    return {
        "x": x["review_body"],
        "y": x["stars"],
    }

def main():
    print("New Attacker")
    attacker = OpenAttack.attackers.TextFoolerAttacker()

    print("Building model")
    victim = TextBlobClassifier()

    print("Loading dataset")
    dataset = load_dataset("amazon_reviews_multi", "zh", split="train[:20]").map(function=dataset_mapping)

    print("Start attack")
    attack_data = [(example["x"], 1 if example["y"] > 3 else 0) for example in dataset]
    attack_eval = OpenAttack.AttackEval(attacker, victim)
    attack_eval.eval(attack_data, visualize=True, progress_bar=True)

if __name__ == "__main__":
    main()


New Attacker
Building model
Loading dataset




Start attack





  0%|          | 0/20 [00:00<?, ?it/s][A[A[AERROR:OpenAttack.AttackEval:Exception when evaluate data ('本人账号被盗，资金被江西（杨建）挪用，请亚马逊尽快查实，将本人的200元资金退回。本人已于2017年11月30日提交退货申请，为何到2018年了还是没解决？亚马逊是什么情况？请给本人一个合理解释。', 0)
Traceback (most recent call last):
  File "/usr/local/lib/python3.9/dist-packages/OpenAttack/attack_eval/utils.py", line 10, in attack_process
    adversarial_sample = attacker(victim, data)
  File "/usr/local/lib/python3.9/dist-packages/OpenAttack/attackers/classification.py", line 16, in __call__
    raise AttributeError("`%s` needs victim to support `%s` method" % (self.__class__.__name__, "get_pred"))
AttributeError: `TextFoolerAttacker` needs victim to support `get_pred` method
  0%|          | 0/20 [00:00<?, ?it/s]


TypeError: ignored

In [None]:
!pip install datasets
!pip install language_tool_python
!pip install strsimpy
!pip install openattack
!pip install sentencepiece

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.12.0-py3-none-any.whl (474 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m474.6/474.6 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.7,>=0.3.0 (from datasets)
  Downloading dill-0.3.6-py3-none-any.whl (110 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.5/110.5 kB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets)
  Downloading xxhash-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.5/212.5 kB[0m [31m29.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess (from datasets)
  Downloading multiprocess-0.70.14-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.3/134.3 kB[0m [31m21.0 MB/s[0m eta [36m0:00:00[0m
Collec

In [None]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
import OpenAttack
import numpy as np
import datasets
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

class MyClassifier(OpenAttack.Classifier):
    def __init__(self):
        self.analyzer = SentimentIntensityAnalyzer()

    def get_pred(self, input_):
        return self.get_prob(input_).argmax(axis=1)

    """def get_prob(self, input_):
        ret = []
        for sent in input_:
            vs = self.analyzer.polarity_scores(sent)
            if vs['compound'] > 0:
                ret.append(np.array([0, 1]))
            else:
                ret.append(np.array([1, 0]))
        return np.array(ret)"""
    def get_prob(self, input_):
        ret = []
        for sent in input_:
            # SentimentIntensityAnalyzer calculates scores of “neg” and “pos” for each instance
            res = self.analyzer.polarity_scores(sent)

            # we use 𝑠𝑜𝑐𝑟𝑒_𝑝𝑜𝑠 / (𝑠𝑐𝑜𝑟𝑒_𝑛𝑒𝑔 + 𝑠𝑐𝑜𝑟𝑒_𝑝𝑜𝑠) to represent the probability of positive sentiment
            # Adding 10^−6 is a trick to avoid dividing by zero.
            prob = (res["pos"] + 1e-6) / (res["neg"] + res["pos"] + 2e-6)

            ret.append(np.array([1 - prob, prob]))

        # The get_prob method finally returns a np.ndarray of shape (len(input_), 2). See Classifier for detail.
        return np.array(ret)

def dataset_mapping(x):
    return {
        "x": x["text"],
        "y": 1 if x["label"] > 0.5 else 0,
    }

def main():
    # 从 datasets 库中加载 IMDB 数据集的前 2000 个样本
    imdb_train = datasets.load_dataset("imdb", split="test[:500]").map(function=dataset_mapping)

    # 使用自定义分类器作为受害者模型
    victim = MyClassifier()

    # 选择 PWWS 作为攻击者并使用默认参数初始化
    #attacker = OpenAttack.attackers.TextFoolerAttacker()
    attacker = OpenAttack.attackers.PWWSAttacker()
    attack_eval = OpenAttack.AttackEval(attacker, victim, metrics = [
        OpenAttack.metric.SemanticSimilarity(),
        OpenAttack.metric.GrammaticalErrors(),
        # OpenAttack.metric.Fluency()
        # OpenAttack.metric.EditDistance(),
        # OpenAttack.metric.ModificationRate(),
    ])
    attack_eval.eval(imdb_train, visualize=True)


if __name__ == "__main__":
    main()




[32mLabel: 1 (59.92%) --> 0 (52.27%)[0m            |                                   
                                            |                                   
I love sci - fi and am willing to put up    |                                   
i love sci - fi and am willing to put up    |                                   
                                            |                                   
with a lot . Sci - fi movies / TV are       |                                   
with a lot . sci - fi movies / tv are       |                                   
                                            |                                   
usually underfunded , under - appreciated   |                                   
usually underfunded , under - appreciated   |                                   
                                            |                                   
and misunderstood . I tried to like this ,  |                                   
and misunderstood .

KeyboardInterrupt: ignored

In [None]:
import OpenAttack as oa
import numpy as np
import datasets
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer


# configure access interface of the customized victim model by extending OpenAttack.Classifier.
class MyClassifier(oa.Classifier):
    def __init__(self):
        # nltk.sentiment.vader.SentimentIntensityAnalyzer is a traditional sentiment classification model.
        nltk.download('vader_lexicon')
        self.model = SentimentIntensityAnalyzer()

    def get_pred(self, input_):
        return self.get_prob(input_).argmax(axis=1)

    # access to the classification probability scores with respect input sentences
    def get_prob(self, input_):
        ret = []
        for sent in input_:
            # SentimentIntensityAnalyzer calculates scores of “neg” and “pos” for each instance
            res = self.model.polarity_scores(sent)

            # we use 𝑠𝑜𝑐𝑟𝑒_𝑝𝑜𝑠 / (𝑠𝑐𝑜𝑟𝑒_𝑛𝑒𝑔 + 𝑠𝑐𝑜𝑟𝑒_𝑝𝑜𝑠) to represent the probability of positive sentiment
            # Adding 10^−6 is a trick to avoid dividing by zero.
            prob = (res["pos"] + 1e-6) / (res["neg"] + res["pos"] + 2e-6)

            ret.append(np.array([1 - prob, prob]))

        # The get_prob method finally returns a np.ndarray of shape (len(input_), 2). See Classifier for detail.
        return np.array(ret)

def dataset_mapping(x):
    return {
        "x": x["sentence"],
        # "x": x["text"],
        "y": 1 if x["label"] > 0.5 else 0,
    }

# load some examples of SST-2 for evaluation
dataset = datasets.load_dataset("sst", split="test[:500]").map(function=dataset_mapping)
# choose the costomized classifier as the victim model
victim = MyClassifier()
# choose PWWS as the attacker and initialize it with default parameters
attacker = oa.attackers.VIPERAttacker()
# prepare for attacking
attack_eval = oa.AttackEval(attacker, victim,metrics = [
        oa.metric.SemanticSimilarity(),
        oa.metric.GrammaticalErrors()]
        )
# launch attacks and print attack results
attack_eval.eval(dataset, visualize=True)

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
                                            | Running Time:            0.032639 
                                            | Query Exceeded:          no       
Gollum ' s ` performance ' is incredible !  | Victim Model Queries:    121      
                                            | Succeed:                 no       
                                            |                                   
[31mLabel: 0 (50.00%) --> Failed![0m               |                                   
                                            | Running Time:            0.025245 
If this movie were a book , it would be a   | Query Exceeded:          no       
page - turner , you ca n ' t wait to see    | Victim Model Queries:    121      
what happens next .                         | Succeed:                 no       
                                            |                                   
[32mLabel: 0 (56.78%) --> 1 (100.0

{'Total Attacked Instances': 500,
 'Successful Instances': 403,
 'Attack Success Rate': 0.806,
 'Avg. Running Time': 0.002234895706176758,
 'Total Query Exceeded': 0.0,
 'Avg. Victim Model Queries': 27.456,
 'Avg. Semantic Similarity': 0.20180348603666712,
 'Avg. Grammatical Errors': 15.53349875930521}

In [None]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
import OpenAttack as oa
import numpy as np
import datasets
import sentencepiece as spm


def dataset_mapping(x):
    return {
        "x": x["sentence"],
        #"x": x["text"],
        "y": 1 if x["label"] > 0.5 else 0,
        # "y": x["label"],
    }

def main():
    # 从 datasets 库中加载 IMDB 数据集的前 2000 个样本
    imdb_train = datasets.load_dataset("sst", split="train[:100]").map(function=dataset_mapping)

    victim = oa.DataManager.loadVictim("BERT.SST").to("cuda:0")
    #victim = oa.DataManager.loadVictim("ALBERT.IMDB").to("cuda:0")


    # 选择 Deepwordbug 作为攻击者并使用默认参数初始化
    attacker = oa.attackers.DeepWordBugAttacker() # 修改此行
    #attacker = oa.attackers.TextBuggerAttacker()

    # 准备进行攻击
    """attack_eval = oa.AttackEval(attacker, victim, metrics = [
        # oa.metric.SemanticSimilarity(),
        # oa.metric.GrammaticalErrors(),
        # OpenAttack.metric.Fluency()
        # OpenAttack.metric.EditDistance(),
        oa.metric.ModificationRate(),
    ])"""
    attack_eval = oa.AttackEval(attacker, victim)

    # 发起攻击并打印攻击结果
    # attack_eval.eval(imdb_train, visualize=True,num_workers=2)
    attack_eval.eval(imdb_train, visualize=True)

if __name__ == "__main__":
    main()




[31mLabel: 1 (98.17%) --> Failed![0m               |                                   
                                            |                                   
The Rock is destined to be the 21st Century | Running Time:            0.0016472
' s new `` Conan '' and that he ' s going   | Query Exceeded:          no       
to make a splash even greater than Arnold   | Victim Model Queries:    42       
Schwarzenegger , Jean - Claud Van Damme or  | Succeed:                 no       
Steven Segal .                              |                                   
                                            |                                   
[32mLabel: 1 (99.30%) --> 0 (65.90%)[0m            |                                   
                                            |                                   
The [1;31mgorgeously[0m elaborate continuation of `` |                                   
The [1;32mgorgeouѕly[0m elaborate continuation of `` |                        

In [None]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
import OpenAttack as oa
import numpy as np
import datasets
import sentencepiece as spm


def dataset_mapping(x):
    return {
        "x": x["sentence"],
        #"x": x["text"],
        "y": 1 if x["label"] > 0.5 else 0,
        # "y": x["label"],
    }

def main():
    # 从 datasets 库中加载 IMDB 数据集的前 2000 个样本
    imdb_train = datasets.load_dataset("sst", split="train[:100]").map(function=dataset_mapping)

    victim = oa.DataManager.loadVictim("BERT.SST").to("cuda:0")
    #victim = oa.DataManager.loadVictim("ALBERT.IMDB").to("cuda:0")


    # 选择 Deepwordbug 作为攻击者并使用默认参数初始化
    attacker = oa.attackers.BERTAttacker() # 修改此行
    #attacker = oa.attackers.TextBuggerAttacker()

    # 准备进行攻击
    """attack_eval = oa.AttackEval(attacker, victim, metrics = [
        # oa.metric.SemanticSimilarity(),
        # oa.metric.GrammaticalErrors(),
        # OpenAttack.metric.Fluency()
        # OpenAttack.metric.EditDistance(),
        oa.metric.ModificationRate(),
    ])"""
    attack_eval = oa.AttackEval(attacker, victim)

    # 发起攻击并打印攻击结果
    # attack_eval.eval(imdb_train, visualize=True,num_workers=2)
    attack_eval.eval(imdb_train, visualize=True)

if __name__ == "__main__":
    main()




Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[32mLabel: 1 (98.17%) --> 0 (51.54%)[0m            |                                   
                                            |                                   
The Rock is [1;31mdestined[0m to be the 21st Century |                                   
the rock is [1;32mlooking [0m to be the 21st century |                                   
                                            |                                   
' s new `` Conan '' and that he [1;31m'[0m [1;31ms[0m [1;31mgoing[0m   |                                   
' s new `` conan '' and that he [1;32m,[0m [1;32mm[0m [1;32mgot  [0m   | Running Time:            0.18682  
                                            | Query Exceeded:          no       
to make a splash even [1;31mgreater[0m than Arnold   | Victim Model Queries:    108      
to make a splash even [1;32mless   [0m than arnold   | Succeed:                 yes      
                                            |                          