In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english", num_labels=2)

In [None]:
from datasets import load_dataset
dataset = load_dataset("SetFit/sst2")

In [None]:
import math
import datasets
import numpy as np
import random
from transformers import Trainer
from datasets import load_metric
metric = load_metric("accuracy")

In [None]:
id_to_char = ['<PAD>', '<CLS>', '<SEP>', '<MASK>', ' ', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '=', '?', '@', \
              '[', '\\', ']', '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~']
char_to_id = {c:i for i,c in enumerate(id_to_char)}
chars = [' ', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '=', '?', '@', '[', '\\', ']', \
         '^', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~']
rep = ['-lrb-', '-rrb-', 'ã§', 'ã¯', 'ã£', 'ã¨', 'ã»', 'ã¶', 'ã±', 'ã¢', 'ã-', 'ã¡', 'ã¦', 'ã³', 'ã©', 'ã¼', 'ü', 'û', 'ñ', 'ó', 'ô', 'ö', 'í', 'ï', 'mollã', 'jirã', 'ã', '\xad', '¼', '³', '¡', '¦', '\xa0', '¢', 'ç', '´', 'à', 'á', 'â', 'é', 'è', 'æ' ]
tok = ['('    , ')'    , 'c' , 'i' , 'a' , 'e' , 'u' , 'o' , 'n' , 'a' , 'i' , 'a' , 'ae', 'o' , 'e' , 'u' , 'u', 'u', 'n', 'o', 'o', 'o', 'i', 'i', 'molla', 'jiri', 'a', ''    , '' , '' , '' , '' , ''    , 'c', 'c', '' , 'a', 'a', 'a', 'e', 'e', 'ae']
assert(len(rep)==len(tok))

num_replace = 1
num_mod = 1
global_seed = 1

def gen_adv(examples):
    ret = []
    np.random.seed(global_seed)
    random.seed(global_seed)
    for text in examples['text']:
        text = refine_sentence(text)
        text = list(text)
        
        rpl = random.random() 
        ins = random.random()
        rmv = random.random()
        rpl = rpl/(rpl+ins+rmv)
        ins = ins/(rpl+ins+rmv)
        rpl = math.floor(num_mod*rpl)
        ins = math.floor(num_mod*ins)
        rmv = num_mod - rpl - ins
        
        #remove
        rnd = np.random.permutation(len(text))[:rmv]
        rnd *= -1
        rnd.sort()
        rnd *= -1
        for i in rnd:
            del text[i]
        
        #replace
        rnd = np.random.permutation(len(text))[:rpl]
        for i in rnd:
            text[i] = random.choice(chars)
        
        #insert
        rnd = np.random.permutation(len(text))[:ins]
        rnd *= -1
        rnd.sort()
        rnd *= -1
        for i in rnd:
            text.insert(i, random.choice(chars))
            
        ret.append(''.join(text))
    return {'text':ret}

def refine_sentence(sent):
    sent = sent.lower()
    # Cut out \n|
    if sent[-1]=='\n': sent=sent[:-1]
    # Repleace LRB, RRB to (, ) respectively
    for f, t in zip(rep, tok):
        sent = sent.replace(f, t)
    return sent

def tokenize_function(examples):
    return tokenizer(examples['text'], padding="max_length", max_length=128, truncation=True)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

trainer = Trainer(model=model, compute_metrics=compute_metrics)

In [None]:
# A very lazy evaluation algorithm :)
seeds = [31, 41, 59, 2, 65, 35, 897, 93, 2384, 626]
val_result = []
test_result = []
for n_r in range(11):
    num_mod = n_r
    val_rt = []
    test_rt = []
    for seed in seeds:
        global_seed = seed
        replaced_dataset = dataset.map(gen_adv, batched=True)
        replaced_dataset = replaced_dataset.map(tokenize_function, batched=True)
        val_r = trainer.evaluate(replaced_dataset['validation'])
        test_r = trainer.evaluate(replaced_dataset['test'])
        val_rt.append(val_r)
        test_rt.append(test_r)
    val_result.append(val_rt)
    test_result.append(test_rt)

In [None]:
# Print result
for k in range(11):
    print(k)
    avg = 0.0
    for r in val_result[k]:
        print(r['eval_accuracy'], end=' ')
        avg += r['eval_accuracy']
    print("AVG: ", avg/len(val_result[k]))
print("="*80)
for k in range(11):
    print(k)
    avg = 0.0
    for r in test_result[k]:
        print(r['eval_accuracy'], end=' ')
        avg += r['eval_accuracy']
    print("AVG: ", avg/len(val_result[k]))