In [1]:
import pandas as pd
from tqdm import notebook as tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer
import tensorflow as tf
import tensorflow_hub as hub
import torch
import math
import numpy as np

In [2]:
PYTORCH_DEVICE = 0
TF_DEVICE = 1
torch.cuda.set_device(0)

In [3]:
class GPT2Metric:
    def __init__(self):
        self._model = AutoModelForCausalLM.from_pretrained("gpt2")
        self._model.to(device=f'cuda:{PYTORCH_DEVICE}')
        self._tokenizer = AutoTokenizer.from_pretrained("gpt2", use_fast=True)
        
    def perplexity(self, text):
        input_ids = self._tokenizer.encode(text)
        input_ids = input_ids[: self._tokenizer.model_max_length - 2]
        input_ids.insert(0, self._tokenizer.bos_token_id)
        input_ids.append(self._tokenizer.eos_token_id)
        input_ids = torch.tensor(input_ids)
        input_ids = input_ids.to(device=f'cuda:{PYTORCH_DEVICE}')
        with torch.no_grad():
            loss = self._model(input_ids, labels=input_ids)[0].item()
    
        perplexity = math.exp(loss)
        return perplexity
    
    def calc_metric(self, orig_text, new_text):
        orig_perplexity = self.perplexity(orig_text)
        new_perplexity = self.perplexity(new_text)
        return (new_perplexity - orig_perplexity) / orig_perplexity
    

class USEMetric:
    def __init__(self):
        tfhub_url = "https://tfhub.dev/google/universal-sentence-encoder/4"
        with tf.device(f'/device:GPU:{TF_DEVICE}'):
            self._model = hub.load(tfhub_url)

    def encode(self, orig_text, new_text):
        with tf.device(f'/device:GPU:{TF_DEVICE}'):
            return self._model([orig_text, new_text]).numpy()
    
    def get_angular_sim(self, emb1, emb2):
        cos_sim = torch.nn.CosineSimilarity(dim=0)(emb1, emb2)
        return 1 - (torch.acos(cos_sim) / math.pi)
    
    def calc_metric(self, orig_text, new_text):
        orig_emb, new_emb = self.encode(orig_text, new_text)
        orig_emb = torch.tensor(orig_emb)
        new_emb = torch.tensor(new_emb)
        sim = self.get_angular_sim(orig_emb, new_emb).item()
        return sim

class PercentageOfWordsChanged:
    def calc_metric(self, orig_text, new_text):
        orig_words = np.array(orig_text.split())
        new_words = np.array(new_text.split())
        words_changed = (orig_words != new_words).sum()
        return words_changed * 100 / len(orig_words)
    
class Evaluator:
    def __init__(self):
        self.use_metric = USEMetric()
        self.gpt2_metric = GPT2Metric()
        self.percentageOfWordsChanged = PercentageOfWordsChanged()
        
    def evaluate(self, csv_file, all_successful):
        df = pd.read_csv(csv_file)
        df = df[df['result_type']=="Successful"]

        total_sim = 0
        total_pp_diff = 0
        word_changed_percent = 0
        N = 0
        for i, row in df.iterrows():
            original_text = row["original_text"].replace("[","").replace("]","")
            if original_text not in all_successful:
                continue
            perturbed_text = row["perturbed_text"].replace("[","").replace("]","")
            sim = self.use_metric.calc_metric(original_text, perturbed_text)
            total_sim += sim
            pp_diff = self.gpt2_metric.calc_metric(original_text, perturbed_text)
            total_pp_diff += pp_diff
            word_changed_percent += self.percentageOfWordsChanged.calc_metric(original_text, perturbed_text)
            N += 1

        return total_sim / N, total_pp_diff / N, word_changed_percent / N

In [4]:
evaluator = Evaluator()

INFO:absl:Using /tmp/tfhub_modules to cache modules.
INFO:absl:Downloading TF-Hub Module 'https://tfhub.dev/google/universal-sentence-encoder/4'.
INFO:absl:Downloaded https://tfhub.dev/google/universal-sentence-encoder/4, Total size: 987.47MB
INFO:absl:Downloaded TF-Hub Module 'https://tfhub.dev/google/universal-sentence-encoder/4'.
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [17]:
#models = ["bert-yelp-test"]
models = ["lstm-yelp-test", "lstm-mr-test"]
model_dataset_names = {
    "bert-mr-test": "BERT Movie Reviews",
    "bert-yelp-test": "BERT Yelp Polarity",
    "bert-snli-test": "BERT SNLI",
    "lstm-mr-test": "LSTM Movie Reviews",
    "lstm-yelp-test": "LSTM Yelp Polarity",
}
transformations = ["word-swap-embedding", "word-swap-hownet", "word-swap-wordnet"]
constraint_levels = ["strict"]
search_methods = ["greedy", "beam4", "beam8", "greedyWIR_unk", "greedyWIR_delete", "greedyWIR_pwws", "greedyWIR_random", "genetic", "pso"]
#search_methods=["pso"]
search_method_names = {
    'greedy': 'Greedy [b=1]',
    'beam4': 'Beam Search [b=4]',
    'beam8': 'Beam Search [b=8]',
    'greedyWIR_unk': 'Greedy WIR [UNK]',
    'greedyWIR_delete': 'Greedy WIR [DEL]',
    'greedyWIR_random': 'Greedy WIR [RAND]',
    'greedyWIR_pwws': 'Greedy WIR [PWWS]',
    'genetic': 'Genetic Algorithm',
    'pso': 'Particle Swarm Optimization'
}
RESULT_ROOT_DIR = "./results"


In [18]:
all_successful_attacks = []
num_files = len(models) * len(transformations) * len(constraint_levels) * len(search_methods)
pbar = tqdm.tqdm(total=num_files, smoothing=0)
for model in models:
    for t in transformations:
        for cl in constraint_levels:
            all_successful = set()
            for sm in search_methods:
                csv_path = f"{RESULT_ROOT_DIR}/{model}/{t}/{cl}/{sm}.csv"
                df = pd.read_csv(csv_path)
                df = df[df['result_type']=="Successful"]
                df["original_text"] = df.apply(lambda row: row["original_text"].replace("[","").replace("]",""), axis=1)
                if len(all_successful) == 0:
                    all_successful = set(df["original_text"])
                else:
                    all_successful = all_successful.intersection(set(df["original_text"]))
                pbar.update(1)
            all_successful_attacks.append(all_successful)


HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))

In [19]:
num_files = len(models) * len(transformations) * len(constraint_levels) * len(search_methods)
pbar = tqdm.tqdm(total=num_files, smoothing=0)
i = 0
for model in models:
    for t in transformations:
        for cl in constraint_levels:
            print("="*45)
            print(f"{model}/{t}/{cl}")
            print("-"*45)
            for sm in search_methods:
                csv_path = f"{RESULT_ROOT_DIR}/{model}/{t}/{cl}/{sm}.csv"
                all_successful = all_successful_attacks[i]
                avg_sim, avg_pp_diff, words_changed_percent = evaluator.evaluate(csv_path, all_successful)
                print(f"{sm}: word_changed_percent={round(words_changed_percent, 2)}, sim={round(avg_sim, 3)}, pp_diff={str(round(avg_pp_diff * 100, 1))}%")
                pbar.update(1)
            print("="*45)
            i += 1

HBox(children=(FloatProgress(value=0.0, max=54.0), HTML(value='')))

lstm-yelp-test/word-swap-embedding/strict
---------------------------------------------
greedy: word_changed_percent=4.09, sim=0.942, pp_diff=29.6%
beam4: word_changed_percent=4.06, sim=0.942, pp_diff=29.6%
beam8: word_changed_percent=4.06, sim=0.942, pp_diff=29.4%
greedyWIR_unk: word_changed_percent=5.95, sim=0.932, pp_diff=44.0%
greedyWIR_delete: word_changed_percent=5.93, sim=0.932, pp_diff=42.3%
greedyWIR_pwws: word_changed_percent=4.66, sim=0.939, pp_diff=34.0%
greedyWIR_random: word_changed_percent=7.34, sim=0.924, pp_diff=54.7%
genetic: word_changed_percent=6.01, sim=0.932, pp_diff=44.0%
pso: word_changed_percent=6.75, sim=0.928, pp_diff=48.4%
lstm-yelp-test/word-swap-hownet/strict
---------------------------------------------
greedy: word_changed_percent=2.5, sim=0.948, pp_diff=23.9%
beam4: word_changed_percent=2.5, sim=0.948, pp_diff=23.7%
beam8: word_changed_percent=2.48, sim=0.949, pp_diff=23.0%
greedyWIR_unk: word_changed_percent=3.58, sim=0.934, pp_diff=34.9%
greedyWIR_del