In [2]:
import pickle
import re
import os
import unicodedata
import torch
import numpy as np
import torch.nn as nn
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F
import pickle
from utils.wordProcess import *
from models.moiveRnn import Model
from settings import *
from adversary.fgsm import Attack_MOVIE
from utils.roc_plot import roc_auc
from adversary.jsma import jsma

In [3]:
with open('output/dict.pkl','rb') as f :
    word_dict = pickle.load(f)
word_dict_reverse = {v: k for k, v in word_dict.items()}
word_length = len(word_dict)
# print(word_length)
vocabLimit = 50000
max_sequence_len = 500
embedding_dim = 50
hidden_dim = 100

model = Model(embedding_dim, hidden_dim,vocabLimit).to(device)
criterion_none = nn.CrossEntropyLoss(reduction='none')
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

checkpoint = torch.load(MOIVE_CKPT_ADV_TRAINING)
model.load_state_dict(checkpoint['net'])
best_acc = 0

f = open('data/labeledTrainData.tsv').readlines()

bim_attack = Attack_MOVIE(model, F.cross_entropy)

In [4]:
def FGSM(x, y_true, eps=0.01):
    x = Variable(x.to(device), requires_grad=True)
    y_true = Variable(y_true.to(device), requires_grad=False)

    x_adv = bim_attack.fgsm(x, y_true, False, eps)
    return x_adv

In [5]:
for idx, lines in enumerate(f):
    if idx > 0:
        data = lines.split('\t')[2]
        data = normalizeString(data).strip()
        input_data = [word_dict[word] for word in data.split(' ')]
        if len(input_data) > 25:
            continue
        if len(input_data) > max_sequence_len:
            input_data = input_data[0:max_sequence_len]
        input_data = Variable(torch.LongTensor(input_data)).to(device)
        target = int(lines.split('\t')[1])
        target_data = Variable(torch.LongTensor([target])).to(device)
        
        y_pred, embeddings = model(input_data)
        _, predicted = y_pred.max(1)
        if predicted.eq(target_data).sum().item():
            changed, benign_adv, change_words, loss_benign = jsma(input_data.clone(), target_data, model,
                                                                  nb_classes=2, max_iter=20)
            if changed:
                adv_data_list = [word_dict_reverse[k.item()] for k in benign_adv]
                adv_data = ' '.join(adv_data_list)
                
                _, input_data_embedding = model(input_data)
                _, benign_adv_embedding = model(benign_adv)
                benign_undercover = FGSM(input_data_embedding, target_data, eps=0.001)
                adv_undercover = FGSM(benign_adv_embedding, 1 - target_data, eps=0.001)

                benign_outputs, _ = model(benign_undercover, after_embedding=True)
                temp1 = criterion_none(benign_outputs, target_data).detach().cpu().numpy()[0]
                adv_outputs, _ = model(adv_undercover, after_embedding=True)
                temp2 = criterion_none(adv_outputs, 1 - target_data).detach().cpu().numpy()[0]
                    
                print('-'*20)
                print('original sentiment: ', target)
                print('original sentence: ',data)
                print('adv sentence:      ',adv_data)
                print(('original criterion loss: %.2f -- adversarial criterion loss: %.2f') % (temp1, temp2))

--------------------
original sentiment:  0
original sentence:  the characters are unlikeable and the script is awful . it s a waste of the talents of deneuve and auteuil .
adv sentence:       the characters are if some who who is and . it who a waste of but about that the and he the
original criterion loss: -0.00 -- adversarial criterion loss: 0.26
--------------------
original sentiment:  1
original sentence:  adrian pasdar is excellent is this film . he makes a fascinating woman .
adv sentence:       the pasdar is excellent is just just there in makes and fascinating just .
original criterion loss: -0.00 -- adversarial criterion loss: 0.59
--------------------
original sentiment:  1
original sentence:  this movie will always be a broadway and movie classic as long as there are still people who sing dance and act .
adv sentence:       this movie will always some so be and movie classic as long as there are still people who sing dance and act .
original criterion loss: 0.00 -- adversa

The split criterion loss is 0.117575884.
normal sentence --> criterion loss <= 0.117575884
adversarial sentence --> criterion loss > 0.117575884