In [None]:
import argparse
import torch
import time
import json
import numpy as np
import math
import random
from subprocess import check_output

from utils_test import save_data, predict_boundary_test, predict_number_test, label_res14_xml, label_res15_xml, label_res16_xml, label_lap_xml
from utils_train import generate_idx_word
from generate_boundary_pred_label import return_predicted_boundary_test_label, generate_boundary_test_data, load_data_test, squad_test_data, save_test
from generate_number_pred_label import generate_number_test_data, json_to_csv, return_predicted_number_test_label

In [None]:
seed = 1337
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

In [None]:
class Model(torch.nn.Module):
    def __init__(self, gen_emb, domain_emb, num_classes=3, dropout=0.5, crf=True):
        super(Model, self).__init__()
        self.gen_embedding = torch.nn.Embedding(gen_emb.shape[0], gen_emb.shape[1])
        self.gen_embedding.weight=torch.nn.Parameter(torch.from_numpy(gen_emb), requires_grad=False)
        self.domain_embedding = torch.nn.Embedding(domain_emb.shape[0], domain_emb.shape[1])
        self.domain_embedding.weight=torch.nn.Parameter(torch.from_numpy(domain_emb), requires_grad=False)
        
        self.conv1=torch.nn.Conv1d(gen_emb.shape[1], 128, 5, padding=2)
        self.conv2=torch.nn.Conv1d(gen_emb.shape[1], 128, 3, padding=1)
        self.dropout=torch.nn.Dropout(dropout)
        
        self.conv3=torch.nn.Conv1d(256, 256, 5, padding=2)
        self.conv4=torch.nn.Conv1d(256, 256, 5, padding=2)
        self.conv5=torch.nn.Conv1d(256, 256, 5, padding=2)
        self.linear_ae=torch.nn.Linear(256, num_classes)
        self.crf_flag=crf
        if self.crf_flag:
            from allennlp.modules import ConditionalRandomField
            self.crf=ConditionalRandomField(num_classes)
          
    def forward(self, x, x_len, x_mask, x_tag=None, testing=True):
        x_emb=torch.cat((self.gen_embedding(x), self.domain_embedding(x)), dim=2)
        x_emb=self.dropout(x_emb).transpose(1, 2)
        x_conv=torch.nn.functional.relu(torch.cat((self.conv1(x_emb), self.conv2(x_emb)), dim=1))
        
        x_conv=self.dropout(x_conv)
        x_conv=torch.nn.functional.relu(self.conv3(x_conv))
        x_conv=self.dropout(x_conv)
        x_conv=torch.nn.functional.relu(self.conv4(x_conv))
        x_conv=self.dropout(x_conv)
        x_conv=torch.nn.functional.relu(self.conv5(x_conv))
        x_conv=x_conv.transpose(1, 2)
        x_logit=self.linear_ae(x_conv)
        if testing:
            if self.crf_flag:
                score=self.crf.viterbi_tags(x_logit, x_mask)
            else:
                x_logit=x_logit.transpose(2, 0)
                score=torch.nn.functional.log_softmax(x_logit).transpose(2, 0)
        else:
            if self.crf_flag:
                score=-self.crf(x_logit, x_tag, x_mask)
            else:
                x_logit=torch.nn.utils.rnn.pack_padded_sequence(x_logit, x_len, batch_first=True)
                score=torch.nn.functional.nll_loss(torch.nn.functional.log_softmax(x_logit.data), x_tag.data)
        return score

In [None]:
def test(model, test_X, test_y, raw_X, domain, command, template, run_epoch, boundary_process, crf, generate_data, batch_size, num_process):
    pred_y = np.zeros((test_X.shape[0], test_X.shape[1]), np.int16)
    idx_word = generate_idx_word(domain)
    model.eval()

    for offset in range(0, test_X.shape[0], batch_size):
        batch_test_X_len = np.sum(test_X[offset:offset+  batch_size] != 0, axis = 1)
        batch_idx = batch_test_X_len.argsort()[::-1]
        batch_test_X_len = batch_test_X_len[batch_idx]
        batch_test_X_mask = (test_X[offset:offset + batch_size] != 0)[batch_idx].astype(np.uint8)
        batch_test_X = test_X[offset:offset + batch_size][batch_idx]
        batch_test_X_mask = torch.autograd.Variable(torch.from_numpy(batch_test_X_mask).long().cuda())
        batch_test_X = torch.autograd.Variable(torch.from_numpy(batch_test_X).long().cuda())
        batch_pred_y = model(batch_test_X, batch_test_X_len, batch_test_X_mask, testing = True)
        r_idx = batch_idx.argsort()
        if crf:
            batch_pred_y = [batch_pred_y[idx] for idx in r_idx]
            for ix in range(len(batch_pred_y)):
                for jx in range(len(batch_pred_y[ix][0])):
                    pred_y[offset + ix, jx] = batch_pred_y[ix][0][jx]
        else:
            batch_pred_y = batch_pred_y.data.cpu().numpy().argmax(axis = 2)[r_idx]
            pred_y[offset:offset + batch_size, :batch_pred_y.shape[1]] = batch_pred_y
    assert len(pred_y) == len(test_X)
    
    if generate_data:
        results = []
        for j_th in range(len(test_X)):
            result = []
            words_num = test_X[j_th]
            words_str = []
            test_y_part = test_y[j_th]
            for w in words_num:
                if(w != 0):
                    words_str.append(idx_word[w])
            pred = pred_y[j_th]
            for words_str, test_y_part, pred in zip(words_str, test_y_part, pred):
                result.append(" ".join([words_str, str(test_y_part), str(pred)]))
            results.append(result)
        save_data(domain, results, run_epoch)

    if boundary_process:
        generate_boundary_test_data(domain, run_epoch)
        predict_boundary_test(domain, run_epoch)
        pred_y = return_predicted_boundary_test_label(domain, run_epoch)
        
    if num_process:
        if boundary_process:
            results = []
            for j_th in range(len(test_X)):
                result = []
                words_num = test_X[j_th]
                words_str = []
                test_y_part = test_y[j_th]
                for w in words_num:
                    if(w != 0):
                        words_str.append(idx_word[w])
                pred = pred_y[j_th]
                for words_str, test_y_part, pred in zip(words_str, test_y_part, pred):
                    result.append(" ".join([words_str, str(test_y_part), str(pred)]))
                results.append(result)
            save_data(domain, results, run_epoch)
            generate_number_test_data(domain, run_epoch)
            json_to_csv(domain, run_epoch)
            predict_number_test(domain, run_epoch)
            pred_y = return_predicted_number_test_label(domain, run_epoch, pred_y)
        else:
            generate_number_test_data(domain, run_epoch)
            json_to_csv(domain, run_epoch)
            predict_number_test(domain, run_epoch)
            pred_y = return_predicted_number_test_label(domain, run_epoch, pred_y)
            
    command = command.split()
    if domain == 'restaurant':
        label_res16_xml(template, command[6], raw_X, pred_y)
        acc = check_output(command).split()
        print(acc)
        return float(acc[7][4:]), float(acc[8][4:]), float(acc[9][10:])
    elif domain == 'laptop':
        label_lap_xml(template, command[4], raw_X, pred_y)
        acc = check_output(command).split()
        print(acc)
        return float(acc[9]), float(acc[12]), float(acc[15])
    elif domain == 'restaurant14':
        label_res14_xml(template, command[4], raw_X, pred_y)
        acc = check_output(command).split()
        print(acc)
        return float(acc[9]), float(acc[12]), float(acc[15])
    elif domain == 'restaurant15':
        label_res15_xml(template, command[5], raw_X, pred_y)
        acc = check_output(command).split()
        print(acc)
        return float(acc[7][4:]), float(acc[8][4:]), float(acc[9][10:])

In [None]:
def evaluate(runs, data_dir, model_dir, domain, boundary_process, crf, generate_data, batch_size, num_process, command, template):
    ae_data=np.load(data_dir+domain+".npz")
    with open(data_dir+domain+"_raw_test.json") as f:
        raw_X=json.load(f)
    results=[]
    precisions = []
    recalls = []
    for r in range(runs):
        model=torch.load(model_dir+domain+str(r))
        precision, recall, result = test(model, ae_data['test_X'], ae_data['test_y'], raw_X, domain, command, template, r, boundary_process, crf, generate_data, batch_size, num_process)
        precisions.append(precision)
        recalls.append(recall)
        results.append(result)
    with open('evaluate_log/DECNN/evaluate.txt', 'a') as log:
        log.write(str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) + '\n')
        log.write('num_process: ' + str(num_process) + '\n')
        log.write('boundary_process: ' + str(boundary_process) + '\n')
        log.write('seed is: ' + str(seed) + '\n')
        log.write('batch_size is: ' + str(batch_size) + '\n')
        log.write('domain is: ' + domain + '\n')
        log.write('result is:' + str(sum(results)/len(results)) + '\n' + '\n')
    print('P:', sum(precisions)/len(precisions))
    print('R:', sum(recalls)/len(recalls))
    print('F1:', sum(results)/len(results))

In [None]:
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--model_dir', type=str, default="model/DECNN/")
    parser.add_argument('--batch_size', type=int, default=64)
    parser.add_argument('--runs', type=int, default=5)
    #you can set this parameter to [laptop], [reataurant], [reataurant14], [reataurant15]
    parser.add_argument('--domain', type=str, default="laptop")
    #if you set the above parameter--domian to [laptop] or [restaurant], you need to set this parameter to [data/prep_data/]
    #else you need to set this parameter to [data/prep_data_15/]
    parser.add_argument('--data_dir', type=str, default="data/prep_data/")
    #you can replace the softmax layer with CRF layer
    parser.add_argument('--crf', type=bool, default=False)
    parser.add_argument('--generate_data', type=bool, default=True)
    #Aspect Number Determining module
    parser.add_argument('--num_process', type=bool, default=True)
    #Aspect Boundary MOdifying module
    parser.add_argument('--boundary_process', type=bool, default=True)
    args = parser.parse_known_args()[0]

    if args.domain=='restaurant':
        command="java -cp script/A.jar absa16.Do Eval -prd data/official_data/pred.xml -gld data/official_data/EN_REST_SB1_TEST.xml.gold -evs 2 -phs A -sbt SB1"
        template="data/official_data/EN_REST_SB1_TEST.xml.A"
    elif args.domain=='laptop':
        command="java -cp script/eval.jar Main.Aspects data/official_data/pred.xml data/official_data/Laptops_Test_Gold.xml"
        template="data/official_data/Laptops_Test_Data_PhaseA.xml"
    elif args.domain=='restaurant14':
        command="java -cp script/eval.jar Main.Aspects data/official_data/pred.xml data/official_data/Restaurants_Test_Gold.xml"
        template="data/official_data/Restaurants_Test_Data_PhaseA.xml"
    elif args.domain=='restaurant15':
        command="java -cp script/A.jar absa15.Do Eval data/official_data/pred.xml data/official_data/ABSA15_Restaurants_Test.xml 2 0"
        template="data/official_data/ABSA15_Restaurants_Test_template.xml"
    evaluate(args.runs, args.data_dir, args.model_dir, args.domain, args.boundary_process, args.crf, args.generate_data, args.batch_size, args.num_process, command, template)