In [1]:
import argparse
import torch
import time
import json
import numpy as np
import math
import random
import xml.etree.ElementTree as ET
from config import flags
from subprocess import check_output

from utils_test import save_data, predict_boundary_test, predict_number_test, label_res14_xml, label_res15_xml, label_res16_xml, label_lap_xml
from utils_train import generate_idx_word
from generate_boundary_pred_label import return_predicted_boundary_test_label, generate_boundary_test_data, load_data_test, squad_test_data, save_test
from generate_number_pred_label import generate_number_test_data, json_to_csv, return_predicted_number_test_label

Using TensorFlow backend.


In [2]:
seed = 1337
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
flags.seed = seed

In [3]:
def test(model, test_X, test_y, raw_X, domain, command, template, run_epoch, boundary_process, crf, generate_data, batch_size, num_process):
    pred_y = np.zeros((test_X.shape[0], test_X.shape[1]), np.int16)
    idx_word = generate_idx_word(domain)
    model.eval()
    for offset in range(0, test_X.shape[0], batch_size):
        batch_test_X_len = np.sum(test_X[offset:offset+batch_size] != 0, axis = 1)
        batch_idx = batch_test_X_len.argsort()[::-1]
        batch_test_X_len = batch_test_X_len[batch_idx]
        batch_test_X_mask = (test_X[offset:offset + batch_size] != 0)[batch_idx].astype(np.uint8)
        batch_test_X = test_X[offset:offset + batch_size][batch_idx]
        
        batch_test_X_mask = torch.from_numpy(batch_test_X_mask).long().cuda()
        batch_test_X = torch.from_numpy(batch_test_X).long().cuda()
        batch_pred_y, attention_weight=model(batch_test_X, batch_test_X_len, testing=True)
        r_idx=batch_idx.argsort()
        if crf:
            batch_pred_y=[batch_pred_y[idx] for idx in r_idx]
            for ix in range(len(batch_pred_y) ):
                for jx in range(len(batch_pred_y[ix]) ):
                    pred_y[offset+ix,jx]=batch_pred_y[ix][jx]
        else:
            batch_pred_y=batch_pred_y.data.cpu().numpy().argmax(axis=2)[r_idx]
            pred_y[offset:offset+batch_size,:batch_pred_y.shape[1]]=batch_pred_y
            attention_weight = attention_weight.transpose(0,1).cpu().detach().numpy()
    assert len(pred_y) == len(test_X)
    
    if generate_data:
        results = []
        for j_th in range(len(test_X)):
            result = []
            words_num = test_X[j_th]
            words_str = []
            test_y_part = test_y[j_th]
            for w in words_num:
                if(w != 0):
                    words_str.append(idx_word[w])
            pred = pred_y[j_th]
            for words_str, test_y_part, pred in zip(words_str, test_y_part, pred):
                result.append(" ".join([words_str, str(test_y_part), str(pred)]))
            results.append(result)
        save_data(domain, results, run_epoch)

    if boundary_process:
        generate_boundary_test_data(domain, run_epoch)
        predict_boundary_test(domain, run_epoch)
        pred_y = return_predicted_boundary_test_label(domain, run_epoch)
        
    if num_process:
        if boundary_process:
            results = []
            for j_th in range(len(test_X)):
                result = []
                words_num = test_X[j_th]
                words_str = []
                test_y_part = test_y[j_th]
                for w in words_num:
                    if(w != 0):
                        words_str.append(idx_word[w])
                pred = pred_y[j_th]
                for words_str, test_y_part, pred in zip(words_str, test_y_part, pred):
                    result.append(" ".join([words_str, str(test_y_part), str(pred)]))
                results.append(result)
            save_data(domain, results, run_epoch)
            generate_number_test_data(domain, run_epoch)
            json_to_csv(domain, run_epoch)
            predict_number_test(domain, run_epoch)
            pred_y = return_predicted_number_test_label(domain, run_epoch, pred_y)
        else:
            generate_number_test_data(domain, run_epoch)
            json_to_csv(domain, run_epoch)
            predict_number_test(domain, run_epoch)
            pred_y = return_predicted_number_test_label(domain, run_epoch, pred_y)
            
    command = command.split()
    if domain == 'restaurant':
        label_res16_xml(template, command[6], raw_X, pred_y)
        acc = check_output(command).split()
        print(acc)
        return float(acc[9][10:])
    elif domain == 'laptop':
        label_lap_xml(template, command[4], raw_X, pred_y)
        acc = check_output(command).split()
        print(acc)
        return float(acc[15])
    elif domain == 'restaurant14':
        label_res14_xml(template, command[4], raw_X, pred_y)
        acc = check_output(command).split()
        print(acc)
        return float(acc[15])
    elif domain == 'restaurant15':
        label_res15_xml(template, command[5], raw_X, pred_y)
        acc = check_output(command).split()
        print(acc)
        return float(acc[9][10:])

In [4]:
def evaluate(runs, data_dir, model_dir, domain, boundary_process, crf, generate_data, batch_size, num_process, command, template):
    ae_data=np.load(data_dir+domain+".npz")
    with open(data_dir+domain+"_raw_test.json") as f:
        raw_X=json.load(f)
    results=[]
    for r in range(runs):
        model=torch.load(model_dir+domain+str(r))
        result=test(model, ae_data['test_X'], ae_data['test_y'], raw_X, domain, command, template, r, boundary_process, crf, generate_data, batch_size, num_process)
        results.append(result)
    with open('evaluate_log/Seq2Seq/evaluate.txt', 'a') as log:
        log.write(str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) + '\n')
        log.write('num_process: ' + str(num_process) + '\n')
        log.write('boundary_process: ' + str(boundary_process) + '\n')
        log.write('seed is: ' + str(seed) + '\n')
        log.write('batch_size is: ' + str(batch_size) + '\n')
        log.write('domain is: ' + domain + '\n')
        log.write('result is:' + str(sum(results)/len(results)) + '\n' + '\n')
    print(sum(results)/len(results))

In [5]:
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--runs', type=int, default=5)
    parser.add_argument('--data_dir', type=str, default="data/prep_data/")
    parser.add_argument('--model_dir', type=str, default="model/Seq2Seq/")
    parser.add_argument('--domain', type=str, default="laptop")
    parser.add_argument('--batch_size', type=int, default=64)
    parser.add_argument('--crf', type=bool, default=False)
    parser.add_argument('--generate_data', type=bool, default=True)
    parser.add_argument('--num_process', type=bool, default=True)
    parser.add_argument('--boundary_process', type=bool, default=True)
    args = parser.parse_known_args()[0]

    if args.domain=='restaurant':
        command="java -cp script/A.jar absa16.Do Eval -prd data/official_data/pred.xml -gld data/official_data/EN_REST_SB1_TEST.xml.gold -evs 2 -phs A -sbt SB1"
        template="data/official_data/EN_REST_SB1_TEST.xml.A"
    elif args.domain=='laptop':
        command="java -cp script/eval.jar Main.Aspects data/official_data/pred.xml data/official_data/Laptops_Test_Gold.xml"
        template="data/official_data/Laptops_Test_Data_PhaseA.xml"
    elif args.domain=='restaurant14':
        command="java -cp script/eval.jar Main.Aspects data/official_data/pred.xml data/official_data/Restaurants_Test_Gold.xml"
        template="data/official_data/Restaurants_Test_Data_PhaseA.xml"
    elif args.domain=='restaurant15':
        command="java -cp script/A.jar absa15.Do Eval data/official_data/pred.xml data/official_data/ABSA15_Restaurants_Test.xml 2 0"
        template="data/official_data/ABSA15_Restaurants_Test_template.xml"
    evaluate(args.runs, args.data_dir, args.model_dir, args.domain, args.boundary_process, args.crf, args.generate_data, args.batch_size, args.num_process, command, template)

[b'Aspects', b'--------------------------------------', b'#System', b'Aspect', b'Terms=404', b'#Gold', b'Aspect', b'Terms=654', b'Pre:', b'0.9207921', b'(372/404)', b'Rec:', b'0.56880736', b'(372/654)', b'F:', b'0.70321363', b'Categories', b'--------------------------------------', b'#System', b'Aspect', b'Categories=0', b'#Gold', b'Aspect', b'Categories=0', b'Pre:', b'NaN', b'(0/0)', b'Rec:', b'NaN', b'(0/0)', b'F:', b'NaN']
[b'Aspects', b'--------------------------------------', b'#System', b'Aspect', b'Terms=413', b'#Gold', b'Aspect', b'Terms=654', b'Pre:', b'0.9128329', b'(377/413)', b'Rec:', b'0.5764526', b'(377/654)', b'F:', b'0.70665425', b'Categories', b'--------------------------------------', b'#System', b'Aspect', b'Categories=0', b'#Gold', b'Aspect', b'Categories=0', b'Pre:', b'NaN', b'(0/0)', b'Rec:', b'NaN', b'(0/0)', b'F:', b'NaN']
[b'Aspects', b'--------------------------------------', b'#System', b'Aspect', b'Terms=467', b'#Gold', b'Aspect', b'Terms=654', b'Pre:', b'0.

In [6]:
0.6780045620000001

0.725917508

0.725917508