In [1]:
import pandas as pd
from tqdm import notebook as tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer
import tensorflow as tf
import tensorflow_hub as hub
import torch
import math
import numpy as np

In [2]:
PYTORCH_DEVICE = 0
TF_DEVICE = 1
torch.cuda.set_device(0)

In [3]:
class GPT2Metric:
    def __init__(self):
        self._model = AutoModelForCausalLM.from_pretrained("gpt2")
        self._model.to(device=f'cuda:{PYTORCH_DEVICE}')
        self._tokenizer = AutoTokenizer.from_pretrained("gpt2", use_fast=True)
        
    def perplexity(self, text):
        input_ids = self._tokenizer.encode(text)
        input_ids = input_ids[: self._tokenizer.model_max_length - 2]
        input_ids.insert(0, self._tokenizer.bos_token_id)
        input_ids.append(self._tokenizer.eos_token_id)
        input_ids = torch.tensor(input_ids)
        input_ids = input_ids.to(device=f'cuda:{PYTORCH_DEVICE}')
        with torch.no_grad():
            loss = self._model(input_ids, labels=input_ids)[0].item()
    
        perplexity = math.exp(loss)
        return perplexity
    
    def calc_metric(self, orig_text, new_text):
        orig_perplexity = self.perplexity(orig_text)
        new_perplexity = self.perplexity(new_text)
        return (new_perplexity - orig_perplexity) / orig_perplexity
    

class USEMetric:
    def __init__(self):
        tfhub_url = "https://tfhub.dev/google/universal-sentence-encoder/4"
        with tf.device(f'/device:GPU:{TF_DEVICE}'):
            self._model = hub.load(tfhub_url)

    def encode(self, orig_text, new_text):
        with tf.device(f'/device:GPU:{TF_DEVICE}'):
            return self._model([orig_text, new_text]).numpy()
    
    def get_angular_sim(self, emb1, emb2):
        cos_sim = torch.nn.CosineSimilarity(dim=0)(emb1, emb2)
        return 1 - (torch.acos(cos_sim) / math.pi)
    
    def calc_metric(self, orig_text, new_text):
        orig_emb, new_emb = self.encode(orig_text, new_text)
        orig_emb = torch.tensor(orig_emb)
        new_emb = torch.tensor(new_emb)
        sim = self.get_angular_sim(orig_emb, new_emb).item()
        return sim

class PercentageOfWordsChanged:
    def calc_metric(self, orig_text, new_text):
        orig_words = np.array(orig_text.split())
        new_words = np.array(new_text.split())
        words_changed = (orig_words != new_words).sum()
        return words_changed * 100 / len(orig_words)
    
class Evaluator:
    def __init__(self):
        self.use_metric = USEMetric()
        self.gpt2_metric = GPT2Metric()
        self.percentageOfWordsChanged = PercentageOfWordsChanged()
        
    def evaluate(self, csv_file, all_successful):
        df = pd.read_csv(csv_file)
        df = df[df['result_type']=="Successful"]

        total_sim = 0
        total_pp_diff = 0
        word_changed_percent = 0
        N = 0
        for i, row in df.iterrows():
            original_text = row["original_text"].replace("[","").replace("]","")
            if original_text not in all_successful:
                continue
            perturbed_text = row["perturbed_text"].replace("[","").replace("]","")
            sim = self.use_metric.calc_metric(original_text, perturbed_text)
            total_sim += sim
            pp_diff = self.gpt2_metric.calc_metric(original_text, perturbed_text)
            total_pp_diff += pp_diff
            word_changed_percent += self.percentageOfWordsChanged.calc_metric(original_text, perturbed_text)
            N += 1

        return total_sim / N, total_pp_diff / N, word_changed_percent / N

In [4]:
evaluator = Evaluator()

INFO:absl:Using /tmp/tfhub_modules to cache modules.
INFO:absl:Downloading TF-Hub Module 'https://tfhub.dev/google/universal-sentence-encoder/4'.
INFO:absl:Downloaded https://tfhub.dev/google/universal-sentence-encoder/4, Total size: 987.47MB
INFO:absl:Downloaded TF-Hub Module 'https://tfhub.dev/google/universal-sentence-encoder/4'.


In [20]:
models = ["bert-yelp-test", "bert-mr-test", "bert-snli-test"]
#models = ["lstm-yelp-test", "lstm-mr-test"]
model_dataset_names = {
    "bert-mr-test": "BERT Movie Reviews",
    "bert-yelp-test": "BERT Yelp Polarity",
    "bert-snli-test": "BERT SNLI",
    "lstm-mr-test": "LSTM Movie Reviews",
    "lstm-yelp-test": "LSTM Yelp Polarity",
}
transformations = ["word-swap-embedding", "word-swap-hownet", "word-swap-wordnet"]
constraint_levels = ["strict"]
search_methods = ["greedy", "beam4", "beam8", "greedyWIR_unk", "greedyWIR_delete", "greedyWIR_pwws", "greedyWIR_gradient",  "greedyWIR_random", "genetic", "pso"]
search_method_names = {
    'greedy': 'Greedy [b=1]',
    'beam4': 'Beam Search [b=4]',
    'beam8': 'Beam Search [b=8]',
    'greedyWIR_unk': 'Greedy WIR [UNK]',
    'greedyWIR_delete': 'Greedy WIR [DEL]',
    'greedyWIR_random': 'Greedy WIR [RAND]',
    'greedyWIR_random': 'Greedy WIR [Gradient]',
    'greedyWIR_pwws': 'Greedy WIR [PWWS]',
    'genetic': 'Genetic Algorithm',
    'pso': 'Particle Swarm Optimization'
}
RESULT_ROOT_DIR = "./results"


In [25]:
all_successful_attacks = []
num_files = len(models) * len(transformations) * len(constraint_levels) * len(search_methods)
pbar = tqdm.tqdm(total=num_files, smoothing=0)
for model in models:
    for t in transformations:
        for cl in constraint_levels:
            all_successful = set()
            for sm in search_methods:
                csv_path = f"{RESULT_ROOT_DIR}/{model}/{t}/{cl}/{sm}.csv"
                df = pd.read_csv(csv_path)
                df = df[df['result_type']=="Successful"]
                df["original_text"] = df.apply(lambda row: row["original_text"].replace("[","").replace("]",""), axis=1)
                if len(all_successful) == 0:
                    all_successful = set(df["original_text"])
                else:
                    all_successful = all_successful.intersection(set(df["original_text"]))
                pbar.update(1)
            all_successful_attacks.append(all_successful)


HBox(children=(FloatProgress(value=0.0, max=90.0), HTML(value='')))

In [26]:
HEADERS = ["& \\multirow{7}{*}{MR} & Greedy (b=1) &",
"& & Beam Search (b=4) &", 
"& & Beam Search (b=8) &", 
"& & \\importanceRankingNameAbbrev (\\texttt{UNK}) &",
"& & \\importanceRankingNameAbbrev (\\texttt{DEL}) &",
"& & \\importanceRankingNameAbbrev (\\texttt{PWWS}) &",
"& & \\importanceRankingNameAbbrev (\\texttt{Gradient}) &", 
"& & \\importanceRankingNameAbbrev (\\texttt{RAND}) &",
"& & Genetic Algorithm &",
 "& & PSO &"]

In [27]:
num_files = len(models) * len(transformations) * len(constraint_levels) * len(search_methods)
pbar = tqdm.tqdm(total=num_files, smoothing=0)
i = 0
for model in models:
    result = [[] for _ in search_methods]
    for t in transformations:
        for cl in constraint_levels:
            print("="*45)
            print(f"{model}/{t}/{cl}")
            print("-"*45)
            k = 0
            for sm in search_methods:
                csv_path = f"{RESULT_ROOT_DIR}/{model}/{t}/{cl}/{sm}.csv"
                all_successful = all_successful_attacks[i]
                avg_sim, avg_pp_diff, words_changed_percent = evaluator.evaluate(csv_path, all_successful)
                result[k].append(f"{round(words_changed_percent, 2)} & {round(avg_sim, 3)} & {str(round(avg_pp_diff * 100, 1))}")
                pbar.update(1)
                k += 1
            i+=1
    for j, row in enumerate(result):
        print(HEADERS[j] + " & ".join(row) + "\\\\")

HBox(children=(FloatProgress(value=0.0, max=90.0), HTML(value='')))

bert-yelp-test/word-swap-embedding/strict
---------------------------------------------
bert-yelp-test/word-swap-hownet/strict
---------------------------------------------
bert-yelp-test/word-swap-wordnet/strict
---------------------------------------------
& \multirow{7}{*}{MR} & Greedy (b=1) &3.41 & 0.948 & 21.5 & 2.52 & 0.945 & 22.8 & 4.76 & 0.943 & 49.9\\
& & Beam Search (b=4) &3.26 & 0.949 & 20.7 & 2.45 & 0.946 & 22.0 & 4.49 & 0.944 & 46.7\\
& & Beam Search (b=8) &3.2 & 0.95 & 20.1 & 2.42 & 0.947 & 21.4 & 4.46 & 0.945 & 46.4\\
& & \importanceRankingNameAbbrev (\texttt{UNK}) &6.48 & 0.93 & 43.5 & 4.73 & 0.922 & 42.3 & 9.02 & 0.924 & 92.1\\
& & \importanceRankingNameAbbrev (\texttt{DEL}) &6.85 & 0.928 & 47.2 & 5.1 & 0.919 & 46.4 & 9.38 & 0.923 & 98.8\\
& & \importanceRankingNameAbbrev (\texttt{PWWS}) &4.36 & 0.942 & 27.3 & 3.11 & 0.94 & 28.1 & 6.1 & 0.937 & 66.1\\
& & \importanceRankingNameAbbrev (\texttt{Gradient}) &6.16 & 0.933 & 37.8 & 5.58 & 0.913 & 44.5 & 9.1 & 0.925 & 86.4\\
