In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
df = pd.read_csv('../data/ag_news_csv/test.csv', header=None)

In [3]:
from tqdm.autonotebook import tqdm

  """Entry point for launching an IPython kernel.


In [4]:
from model import CharCNN
import torch
from torch.autograd import Variable
from metric import print_f_score
import torch.nn.functional as F

In [5]:
import json
with open('../alphabet.json') as f:
    res = json.load(f)
num_features = len(res)

In [6]:
class Args():
    num_features = num_features
    dropout = 0.5
    test_path = '../data/ag_news_csv/test.csv'
    alphabet_path = '../alphabet.json'
    batch_size = 20
    num_workers = 4
args = Args()

In [14]:
char_model = CharCNN(args)
checkpoint = torch.load('models/CharCNN_best.pth.tar')
char_model.load_state_dict(checkpoint['state_dict'])
char_model = char_model.to("cuda:0")

In [15]:
def test_model(test_dataset, model, limit=None):
    corrects, avg_loss, accumulated_loss, size, fooled = 0, 0, 0, 0, 0
    predicates_all, target_all = [], []
    for i_batch, (data) in enumerate(tqdm(test_dataset)):
        inputs_init, inputs, target = data
        target.sub_(1)
        size+=1
        inputs_init = Variable(torch.unsqueeze(inputs_init, 0).to("cuda:0"))
        inputs = Variable(torch.unsqueeze(inputs, 0).to("cuda:0"))
        target = Variable(torch.unsqueeze(target, 0).to("cuda:0"))
        logit_init = model(inputs_init)
        logit = model(inputs)
        fooled += (logit_init.argmax() != logit.argmax()).detach().cpu().numpy()
    
        predicates = torch.max(logit, 1)[1].view(target.size()).data
        accumulated_loss += F.nll_loss(logit, target, size_average=False).data.item()
        corrects += (torch.max(logit, 1)[1].view(target.size()).data == target.data).sum().item()
        predicates_all+=predicates.cpu().numpy().tolist()
        target_all+=target.data.cpu().numpy().tolist()
        if limit is not None and i_batch == limit:
            break

    avg_loss = accumulated_loss/size
    accuracy = 100.0 * corrects/size
    fooled = 100.0 * fooled/size
    print('\rEvaluation - loss: {:.6f}  acc: {:.3f}%({}/{}) fooling rate: {:.3f}'.format(avg_loss, 
                                                                       accuracy, 
                                                                       corrects, 
                                                                       size,
                                                                       fooled))
    print_f_score(predicates_all, target_all)
    return accuracy, predicates_all, target_all

In [16]:
from data_loader_hotflip_greedy import AGNEWs_HotFlip_Greedy

In [17]:
test_dataset = AGNEWs_HotFlip_Greedy(per_corrupt=1, model=char_model, theta=0.5, dpp=False,
                                  label_data_path=args.test_path, alphabet_path=args.alphabet_path)
accuracy, predicates_all, target_all = test_model(test_dataset, char_model, limit=10)

HBox(children=(FloatProgress(value=0.0, max=7600.0), HTML(value='')))

Evaluation - loss: 0.621648  acc: 81.818%(9/11) fooling rate: 18.182
Label: [31m  2[0m	Prec: [32m 50.0[0m% (1/2)        Recall: [32m100.0[0m% (1/1)        F-Score: [32m 66.7[0m%[0m
Label: [31m  3[0m	Prec: [32m100.0[0m% (8/8)        Recall: [32m 80.0[0m% (8/10)       F-Score: [32m 88.9[0m%[0m


In [18]:
test_dataset.print_string(0)

original: fears for t n pension after talks unions representing workers at turner   newall say they are 'disappointed' after talks with stricken parent firm federal mogul.
[31mcorrupt: [0mfears for t n pension after talks unions representing workers at turner   newall say they are 'disappointed' afte[31m=[0m talks with stricken parent firm federal mogul.

In [19]:
from data_loader_hotflip_beam import AGNEWs_HotFlip_Beam

In [20]:
test_dataset_beam = AGNEWs_HotFlip_Beam(per_corrupt=1, model=char_model, theta=0.5, k=1, dpp=False,
                                        label_data_path=args.test_path, alphabet_path=args.alphabet_path)
accuracy, predicates_all, target_all = test_model(test_dataset_beam, char_model, limit=10)

HBox(children=(FloatProgress(value=0.0, max=7600.0), HTML(value='')))

Evaluation - loss: 0.884745  acc: 63.636%(7/11) fooling rate: 36.364
Label: [31m  2[0m	Prec: [32m 25.0[0m% (1/4)        Recall: [32m100.0[0m% (1/1)        F-Score: [32m 40.0[0m%[0m
Label: [31m  3[0m	Prec: [32m100.0[0m% (6/6)        Recall: [32m 60.0[0m% (6/10)       F-Score: [32m 75.0[0m%[0m


In [21]:
test_dataset_beam.print_string(0)

original: fears for t n pension after talks unions representing workers at turner   newall say they are 'disappointed' after talks with stricken parent firm federal mogul.
[31mcorrupt: [0mfears for t n pension after talks unions representing workers at turner   newall say they are 'disappointed' afte[31m=[0m talks with stricken parent firm federal mogul.

In [22]:
from data_loader_deepwordbug import WordBug

In [23]:
test_dataset_beam = WordBug(per_corrupt=4, model=char_model, scoring='beam',
                                        label_data_path=args.test_path, alphabet_path=args.alphabet_path)
accuracy, predicates_all, target_all = test_model(test_dataset_beam, char_model, limit=10)

HBox(children=(FloatProgress(value=0.0, max=7600.0), HTML(value='')))

Evaluation - loss: 2.176774  acc: 36.364%(4/11) fooling rate: 63.636
Label: [31m  2[0m	Prec: [32m  0.0[0m% (0/6)        Recall: [32m  0.0[0m% (0/1)        F-Score:   N/A[0m
Label: [31m  3[0m	Prec: [32m100.0[0m% (4/4)        Recall: [32m 40.0[0m% (4/10)       F-Score: [32m 57.1[0m%[0m


In [24]:
test_dataset_beam = WordBug(per_corrupt=4, model=char_model, scoring='greedy',
                                        label_data_path=args.test_path, alphabet_path=args.alphabet_path)
accuracy, predicates_all, target_all = test_model(test_dataset_beam, char_model, limit=10)

HBox(children=(FloatProgress(value=0.0, max=7600.0), HTML(value='')))

Evaluation - loss: 1.300553  acc: 45.455%(5/11) fooling rate: 54.545
Label: [31m  2[0m	Prec: [32m  0.0[0m% (0/5)        Recall: [32m  0.0[0m% (0/1)        F-Score:   N/A[0m
Label: [31m  3[0m	Prec: [32m100.0[0m% (5/5)        Recall: [32m 50.0[0m% (5/10)       F-Score: [32m 66.7[0m%[0m
