# Script for crafting Adversarial Examples

In [2]:
import os
import numpy as np
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from sklearn.metrics import confusion_matrix

import utils.text_processing as tp

ModuleNotFoundError: No module named 'plotly'

In [2]:
# get data from important word detector
%store -r important_words_packages
%store -r sentence_packages
%store -r loo_results

%store -r important_words_packages_dev
%store -r sentence_packages_dev
%store -r loo_results_dev

In [3]:
#important_words_packages = important_words_packages_dev
#sentence_packages = sentence_packages_dev
#loo_results = loo_results_dev

## Methed 2: Mispeeling

#### 1. Create Typo-Dict

In [4]:
with open(os.path.join('data','common_typos_wikipedia.txt'), 'r') as f:
    typods = f.readlines()

typodict = {}
      
for typod in typods:  
    typo = typod.split("->")[0].strip('\n')
    word = typod.split("->")[1].strip('\n')
 
    for item in word.split(", "):
        if item in typodict:
            tmp_typo = typodict[item]
            tmp_typo.append(typo)             
            tmp_typo = list(set(tmp_typo)) #filter duplicates
            typodict[item] = tmp_typo
        else:
            typodict.update({item : [typo]})
      

### Create Datasets

In [5]:
# get original sentences from sentence_packages
original_sentences = []
for package in sentence_packages:
    original_sentences.append(package['original_sentence'])

In [6]:
# generate modified words: take Important Words and conduct modification

modified_words_packages = []

for important_words in important_words_packages:
    modified_words = []
    
    for word in important_words:
        modified_word_variances = []
        modified_word_variances.append(tp.to_typo(typodict, word))
        modified_words.append(modified_word_variances)

    modified_words_packages.append(modified_words)

In [7]:
# generate modified sentences
modifyable_original_sentences, modified_sentence_packages, number_of_modified_sentences = tp.generate_modified_sentence_packages(original_sentences, important_words_packages, modified_words_packages)

In [8]:
print('Total number of original sentences:', len(original_sentences))
print('Total number of modifyable original sentences: ', len(modifyable_original_sentences))
print('Total number of modified sentences: ', number_of_modified_sentences)

Total number of original sentences: 943
Total number of modifyable original sentences:  369
Total number of modified sentences:  1354


#### 2. Import BERT Model

In [None]:
k = open("security/key.txt", "r")
key = k.read()

In [9]:
import torch
import random
import spacy
import numpy as np
import transformers
import pandas as pd

from absa import Predictor
from security import Authorization

nlp = spacy.load('en_core_web_sm')
pred = Predictor(os.path.join('models','en-laptops-absa'))

!export CUDA_VISIBLE_DEVICE=0

Initializing Predictor
Loading model models/en-laptops-absa
Config loaded from models/en-laptops-absa/config.json
Aspects loaded from models/en-laptops-absa/aspects.jsonl
Config loaded from models/en-laptops-absa/config.json


#### 3. Prediction

###### Original Prediction

In [10]:
documents = []
for sentence in modifyable_original_sentences:
    document = {'text': sentence, 'segments':[{'span':[0,0],'text': sentence}]}
    documents.append(document)
    
results = pred.predict(documents, key, with_segments=True)

original_predictions = []
for result in results:
    original_predictions.append(result[0])
#print(original_predictions)
print(len(original_predictions))

Running authorization for token for functionality Analysis/Aspect-Sentiments and language None
DEBUG:security.authorization:Running authorization for token for functionality Analysis/Aspect-Sentiments and language None


369


#### Modified Prediction

In [11]:
# 1. create indices for prediction,
# 2. flatten modified_sentence_packages and 
# 3. predict flattened list

results = []
documents = []
package_indices = []
package_index = 0
for sentence in modified_sentence_packages:
    package_index += 1
    for word in sentence:
        for variant in word:  
            package_indices.append(package_index)
            document = {'text': variant, 'segments':[{'span':[0,0],'text': variant}]}
            documents.append(document)
    
results = pred.predict(documents, key, with_segments=True)

modified_results_flattened = []
for result in results:
    modified_results_flattened.append(result[0])

#print(modified_results_flattened)
print(len(modified_results_flattened))

Running authorization for token for functionality Analysis/Aspect-Sentiments and language None
DEBUG:security.authorization:Running authorization for token for functionality Analysis/Aspect-Sentiments and language None


1354


In [12]:
# 4. map to lvl2 sentence packages
# IMPORTANT: here I loose the last variants level because I do not actually need it
# todo: loose levels --> only have one level per sentence when generating modified sentence packages

modified_predictions = []
modified_sentence = []
check = 1
for e, result in enumerate(modified_results_flattened):
    i = package_indices[e]    
    if i == check:
        modified_sentence.append(result)
    else:
        modified_predictions.append(modified_sentence)
        modified_sentence = []
        modified_sentence.append(result)
    check = i
modified_predictions.append(modified_sentence)
#print(modified_predictions)

## 4. Comparison of results to check effectiveness of attack

In [13]:
typo_results = tp.compare_results(original_predictions, modified_predictions)

### 5. Create adversarial Dataset

In [14]:
original_texts, original_results, modified_texts, modified_results, successfull_modifications = tp.generate_results_lists(typo_results)

In [15]:
adversarial_dataset = pd.DataFrame(list(zip(original_texts, original_results, modified_texts, modified_results)),
                 columns = ['original_sentence', 'original_prediction', 'modified_sentence', 'modified_prediction'])

In [16]:
adversarial_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 259 entries, 0 to 258
Data columns (total 4 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   original_sentence    259 non-null    object
 1   original_prediction  259 non-null    object
 2   modified_sentence    259 non-null    object
 3   modified_prediction  259 non-null    object
dtypes: object(4)
memory usage: 8.2+ KB


In [17]:
adversarial_dataset.head()

Unnamed: 0,original_sentence,original_prediction,modified_sentence,modified_prediction
0,Oh my goodness-I am not a happy camper.,"[{'aspect': 'Laptop (general)', 'sentiment': '...","[Oh my goodness-I am nto a happy camper., Oh m...","[[{'aspect': 'Laptop (general)', 'sentiment': ..."
1,I returned the computer to HP and they kept it...,[],[I returned the computer to HP and htey kept i...,"[[{'aspect': 'Laptop (general)', 'sentiment': ..."
2,When I got the computer back and realizwed it ...,"[{'aspect': 'Laptop (general)', 'sentiment': '...",[When I got the cmoputer back and realizwed it...,"[[], [], [], []]"
3,Currently if I use the laptop I can't sit it o...,[],[Currently if I uise the laptop I can't sit it...,"[[{'aspect': 'Temperature', 'sentiment': 'NEG'..."
4,"The screen takes some getting use to, because ...","[{'aspect': 'Screen', 'sentiment': 'NEG'}, {'a...","[The screen takes some getting use to, because...","[[{'aspect': 'Screen', 'sentiment': 'NEG'}], [..."


In [18]:
%store adversarial_dataset

Stored 'adversarial_dataset' (DataFrame)


In [19]:
adversarial_dataset.to_json(r'data/adversarial_dataset_multitypos.json', orient='records')

#### 6. Resluts

In [20]:
results_table_typo = tp.generate_results_df(original_sentences, modifyable_original_sentences, number_of_modified_sentences, successfull_modifications, pmethod='typos')

In [21]:
%store results_table_typo
results_table_typo

Stored 'results_table_typo' (DataFrame)


Unnamed: 0,0
Perturbation Method,typos
Tokenizer,en_core_web_sm
Model,en-laptops-absa
Dataset,SemEval 2015 Laptops
Total number of original sentences,943
Total number of modifyable original sentences,369
Total number of modified sentences,1354
Total number of changed predictions through modification,755
Success Rate,0.557607


In [22]:
extended_original_results, extended_modified_results = tp.generate_multipredictions(original_results, modified_results)

# asp_map = [('Graphics', 'POS'), ....]
# for eor in extended_results:
# tuple = (kv['aspect'], kv['sentiment']) => asp_map.index(tuple)


In [23]:
def generate_aspsent_map_dict(results_dict):
    # check, which aspects and sentiments are in the data

    aspects_sentiments = []

    for item in results_dict:
        print(item)
        tpl = (result['aspect'],result['sentiment'])
        print(tpl)
        if tpl not in aspects_sentiments:
            aspects_sentiments.append(tpl)

    return aspects_sentiments


In [24]:
aspsntmap = generate_aspsent_map_dict(loo_results)
#aspsntmap

{'original_sentence': 'super fast processor and really nice graphics card..', 'original_result': [{'aspect': 'Graphics', 'sentiment': 'POS'}, {'aspect': 'CPU', 'sentiment': 'POS'}, {'aspect': 'Performance', 'sentiment': 'POS'}], 'modified_sentences': [['super processor and really nice graphics card..', 'super fast and really nice graphics card..']], 'modified_results': [[[{'aspect': 'Graphics', 'sentiment': 'POS'}, {'aspect': 'CPU', 'sentiment': 'POS'}], [{'aspect': 'Graphics', 'sentiment': 'POS'}, {'aspect': 'Performance', 'sentiment': 'POS'}]]]}


KeyError: 'aspect'

In [None]:
aspects_sentiments_map_modified = []
for emr in extended_modified_results:
    for result in emr:
        print(result)
        result_tuple = (result['aspect'], result['sentiment'])
        idx = asp_sent_map.index(result_tuple)
        print(idx)
        #aspects_sentiments_map_modified.append(asp_sent_map.index(result_tuple))
        
        
print(aspects_sentiments_map_modified)        

In [None]:
aspects_sentiments_original = []
for item in extended_original_results:
    for result in item:
        aspect_sentiment = []
        aspect_sentiment.append(result.get('aspect'))
        aspect_sentiment.append(result.get('sentiment'))
        aspects_sentiments_original.append(aspect_sentiment)
        
print(aspects_sentiments_original)        

In [1]:


fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = ["A1", "A2", "B1", "B2", "C1", "C2"],
      color = "brown"
    ),
    link = dict(
      source = [0, 1, 0, 2, 3, 3], # indices correspond to labels, eg A1, A2, A2, B1, ...
      target = [2, 3, 3, 4, 4, 5],
      value = [8, 4, 2, 8, 4, 2]
  ))])

fig.update_layout(title_text="Basic Sankey Diagram", font_size=10)
fig.show()


NameError: name 'go' is not defined