# Script for crafting Adversarial Examples

## Perturbation Methods
#### 1. leet speak & unicode
#### 2. typos
#### 3. punctuation marks

In [17]:
import os
import time

import utils.text_processing as tp

In [18]:
# get data from important word detector
%store -r important_words
%store -r sentence_packages

## M3th0d 1: 133t 5p34k

#### 1. Create modified Dataset

In [19]:
original_sentences = []
for sentence in sentence_packages:
    original_sentences.append(sentence['original_sentence'])

In [20]:
modified_words = []
for word in important_words:
    modified_words.append(tp.to_leet(word))

In [21]:
modified_sentences = tp.generate_modified_sentences(original_sentences, important_words, modified_words)

#### 2. Import BERT Model

In [22]:
import torch
import random
import numpy as np
import pandas as pd

from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
model.eval();

#### 3. Prediction

In [23]:
original_predictions = []
for sentence in original_sentences:
    original_predictions.append(tp.predict_sentiment(model, tokenizer, sentence))

In [24]:
modified_predictions = []
for sentence in modified_sentences:
    modified_predictions.append(tp.predict_sentiment(model, tokenizer, sentence))

#### 4. Creation of Dataset

In [25]:
ds = pd.DataFrame(list(zip(original_sentences, original_predictions, modified_sentences, modified_predictions)),
                 columns = ['original_sentence', 'original_prediction', 'modified_sentence', 'modified_prediction'])

In [26]:
ds.head()

Unnamed: 0,original_sentences,original_predictions,modified_sentences,modified_predictions
0,I visited this b&b during a short trip to ride...,4,I visited this b&b during a short trip to ride...,4
1,"The owners were really lovely people, the room...",4,"The owners were really lovely people, the room...",4
2,It is in a really good location for riding or ...,3,It is in a really good location for riding or ...,4
3,I cannot recommend this place enough!.,0,I c4nn0t recommend this place enough!.,4
4,3 friends and I visited 't Materke in April 20...,4,3 friends and I visited 't Materke in April 20...,4


In [27]:
ds.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 435 entries, 0 to 434
Data columns (total 4 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   original_sentences    435 non-null    object
 1   original_predictions  435 non-null    int64 
 2   modified_sentences    435 non-null    object
 3   modified_predictions  435 non-null    int64 
dtypes: int64(2), object(2)
memory usage: 13.7+ KB


#### 5. Creation of adversarial Dataset

In [28]:
advds = ds[ds.original_predictions != ds.modified_predictions]
advds.head()

Unnamed: 0,original_sentences,original_predictions,modified_sentences,modified_predictions
2,It is in a really good location for riding or ...,3,It is in a really good location for riding or ...,4
3,I cannot recommend this place enough!.,0,I c4nn0t recommend this place enough!.,4
5,2 of us had stayed before so knew to expect a ...,2,2 of us had stayed before so knew to 3xp3ct a ...,4
6,'t Materke is a fantastic B&B situatied in the...,4,'t Materke is a f4nt45tic B&B situatied in the...,3
7,In a superb location it is ideal to get out in...,4,In a 5up3rb location it is ideal to get out in...,3


83 Sentences are missclassified after applying leet speak

In [30]:
advds.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 183 entries, 2 to 431
Data columns (total 4 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   original_sentences    183 non-null    object
 1   original_predictions  183 non-null    int64 
 2   modified_sentences    183 non-null    object
 3   modified_predictions  183 non-null    int64 
dtypes: int64(2), object(2)
memory usage: 7.1+ KB


In [31]:
 advds.to_json(r'data/adversarial_dataset_l33t.json', orient='records')