In [1]:
import logging
import math
import os
import sys
from time import strftime, localtime
import random
import numpy
from sklearn import metrics
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
from data_utils import build_tokenizer, build_embedding_matrix, ABSADataset
from layers.dynamic_rnn import DynamicLSTM

logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler(sys.stdout))

In [2]:
class LSTM(nn.Module):
    def __init__(self, embedding_matrix, opt):
        super(LSTM, self).__init__()
        self.embed = nn.Embedding.from_pretrained(torch.tensor(embedding_matrix, dtype=torch.float))
        self.lstm = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True)
        self.dense = nn.Linear(opt.hidden_dim, opt.polarities_dim)

    def forward(self, inputs):
        text_raw_indices = inputs[0]
        x = self.embed(text_raw_indices)
        x_len = torch.sum(text_raw_indices != 0, dim=-1)
        _, (h_n, _) = self.lstm(x, x_len)
        out = self.dense(h_n[0])
        return out

In [3]:
class LocationEncoding(nn.Module):
    def __init__(self, opt):
        super(LocationEncoding, self).__init__()
        self.opt = opt

    def forward(self, x, pos_inx):
        batch_size, seq_len = x.size()[0], x.size()[1]
        weight = self.weight_matrix(pos_inx, batch_size, seq_len).to(self.opt.device)
        x = weight.unsqueeze(2) * x
        return x

    def weight_matrix(self, pos_inx, batch_size, seq_len):
        pos_inx = pos_inx.cpu().numpy()
        weight = [[] for i in range(batch_size)]
        for i in range(batch_size):
            for j in range(pos_inx[i][0]):
                relative_pos = pos_inx[i][0] - j
                aspect_len = pos_inx[i][1] - pos_inx[i][0] + 1
                sentence_len = seq_len - aspect_len
                weight[i].append(1 - relative_pos / sentence_len)
            for j in range(pos_inx[i][0], pos_inx[i][1] + 1):
                weight[i].append(0)
            for j in range(pos_inx[i][1] + 1, seq_len):
                relative_pos = j - pos_inx[i][1]
                aspect_len = pos_inx[i][1] - pos_inx[i][0] + 1
                sentence_len = seq_len - aspect_len
                weight[i].append(1 - relative_pos / sentence_len)
        weight = torch.tensor(weight)
        return weight

class AlignmentMatrix(nn.Module):
    def __init__(self, opt):
        super(AlignmentMatrix, self).__init__()
        self.opt = opt
        self.w_u = nn.Parameter(torch.Tensor(6*opt.hidden_dim, 1))

    def forward(self, batch_size, ctx, asp):
        ctx_len = ctx.size(1)
        asp_len = asp.size(1)
        alignment_mat = torch.zeros(batch_size, ctx_len, asp_len).to(self.opt.device)
        ctx_chunks = ctx.chunk(ctx_len, dim=1)
        asp_chunks = asp.chunk(asp_len, dim=1)
        for i, ctx_chunk in enumerate(ctx_chunks):
            for j, asp_chunk in enumerate(asp_chunks):
                feat = torch.cat([ctx_chunk, asp_chunk, ctx_chunk*asp_chunk], dim=2) # batch_size x 1 x 6*hidden_dim 
                alignment_mat[:, i, j] = feat.matmul(self.w_u.expand(batch_size, -1, -1)).squeeze(-1).squeeze(-1) 
        return alignment_mat

class MGAN(nn.Module):
    def __init__(self, embedding_matrix, opt):
        super(MGAN, self).__init__()
        self.opt = opt
        self.embed = nn.Embedding.from_pretrained(torch.tensor(embedding_matrix, dtype=torch.float))
        self.ctx_lstm = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True, bidirectional=True)
        self.asp_lstm = DynamicLSTM(opt.embed_dim, opt.hidden_dim, num_layers=1, batch_first=True, bidirectional=True)
        self.location = LocationEncoding(opt)
        self.w_a2c = nn.Parameter(torch.Tensor(2*opt.hidden_dim, 2*opt.hidden_dim))
        self.w_c2a = nn.Parameter(torch.Tensor(2*opt.hidden_dim, 2*opt.hidden_dim))
        self.alignment = AlignmentMatrix(opt)
        self.dense = nn.Linear(8*opt.hidden_dim, opt.polarities_dim)

    def forward(self, inputs):
        text_raw_indices = inputs[0] # batch_size x seq_len
        aspect_indices = inputs[1] 
        text_left_indices= inputs[2]
        batch_size = text_raw_indices.size(0)
        ctx_len = torch.sum(text_raw_indices != 0, dim=1)
        asp_len = torch.sum(aspect_indices != 0, dim=1)
        left_len = torch.sum(text_left_indices != 0, dim=-1)
        aspect_in_text = torch.cat([left_len.unsqueeze(-1), (left_len+asp_len-1).unsqueeze(-1)], dim=-1)

        ctx = self.embed(text_raw_indices) # batch_size x seq_len x embed_dim
        asp = self.embed(aspect_indices) # batch_size x seq_len x embed_dim

        ctx_out, (_, _) = self.ctx_lstm(ctx, ctx_len) 
        ctx_out = self.location(ctx_out, aspect_in_text) # batch_size x (ctx)seq_len x 2*hidden_dim
        ctx_pool = torch.sum(ctx_out, dim=1)
        ctx_pool = torch.div(ctx_pool, ctx_len.float().unsqueeze(-1)).unsqueeze(-1) # batch_size x 2*hidden_dim x 1

        asp_out, (_, _) = self.asp_lstm(asp, asp_len) # batch_size x (asp)seq_len x 2*hidden_dim
        asp_pool = torch.sum(asp_out, dim=1)
        asp_pool = torch.div(asp_pool, asp_len.float().unsqueeze(-1)).unsqueeze(-1) # batch_size x 2*hidden_dim x 1

        alignment_mat = self.alignment(batch_size, ctx_out, asp_out) # batch_size x (ctx)seq_len x (asp)seq_len
        # batch_size x 2*hidden_dim
        f_asp2ctx = torch.matmul(ctx_out.transpose(1, 2), F.softmax(alignment_mat.max(2, keepdim=True)[0], dim=1)).squeeze(-1)
        f_ctx2asp = torch.matmul(F.softmax(alignment_mat.max(1, keepdim=True)[0], dim=2), asp_out).transpose(1, 2).squeeze(-1) 

        c_asp2ctx_alpha = F.softmax(ctx_out.matmul(self.w_a2c.expand(batch_size, -1, -1)).matmul(asp_pool), dim=1)
        c_asp2ctx = torch.matmul(ctx_out.transpose(1, 2), c_asp2ctx_alpha).squeeze(-1)
        c_ctx2asp_alpha = F.softmax(asp_out.matmul(self.w_c2a.expand(batch_size, -1, -1)).matmul(ctx_pool), dim=1)
        c_ctx2asp = torch.matmul(asp_out.transpose(1, 2), c_ctx2asp_alpha).squeeze(-1)

        feat = torch.cat([c_asp2ctx, f_asp2ctx, f_ctx2asp, c_ctx2asp], dim=1)
        out = self.dense(feat) # bathc_size x polarity_dim

        return out

In [4]:
# -*- coding: utf-8 -*-
# file: train.py
# author: songyouwei <youwei0314@gmail.com>
# Copyright (C) 2018. All Rights Reserved.

class Instructor:
    def __init__(self, opt):
        self.opt = opt

        tokenizer = build_tokenizer(
            fnames=[opt.dataset_file['train'], opt.dataset_file['test']],
            max_seq_len=opt.max_seq_len,
            dat_fname='{0}_tokenizer.dat'.format(opt.dataset))
        embedding_matrix = build_embedding_matrix(
            word2idx=tokenizer.word2idx,
            embed_dim=opt.embed_dim,
            dat_fname='{0}_{1}_embedding_matrix.dat'.format(str(opt.embed_dim), opt.dataset))
        self.model = opt.model_class(embedding_matrix, opt).to(opt.device)
        self.trainset = ABSADataset(opt.dataset_file['train'], tokenizer)
        self.testset = ABSADataset(opt.dataset_file['test'], tokenizer)
        assert 0 <= opt.valset_ratio < 1
        if opt.valset_ratio > 0:
            valset_len = int(len(self.trainset) * opt.valset_ratio)
            self.trainset, self.valset = random_split(self.trainset, (len(self.trainset)-valset_len, valset_len))
        else:
            self.valset = self.testset

        if opt.device.type == 'cuda':
            logger.info('cuda memory allocated: {}'.format(torch.cuda.memory_allocated(device=opt.device.index)))
        self._print_args()

    def _print_args(self):
        n_trainable_params, n_nontrainable_params = 0, 0
        for p in self.model.parameters():
            n_params = torch.prod(torch.tensor(p.shape))
            if p.requires_grad:
                n_trainable_params += n_params
            else:
                n_nontrainable_params += n_params
        logger.info('> training parameters:')
        for arg in vars(self.opt):
            logger.info('> {0}: {1}'.format(arg, getattr(self.opt, arg)))

    def _reset_params(self):
        for child in self.model.children():
            for p in child.parameters():
                if p.requires_grad:
                    if len(p.shape) > 1:
                        self.opt.initializer(p)
                    else:
                        stdv = 1. / math.sqrt(p.shape[0])
                        torch.nn.init.uniform_(p, a=-stdv, b=stdv)

    def _train(self, criterion, optimizer, train_data_loader, val_data_loader):
        max_val_acc = 0
        max_val_f1 = 0
        global_step = 0
        path = None
        for epoch in range(self.opt.num_epoch):
            logger.info(' ' * 100)
            logger.info('epoch: {}'.format(epoch))
            n_correct, n_total, loss_total = 0, 0, 0
            # switch model to training mode
            self.model.train()
            for i_batch, sample_batched in enumerate(train_data_loader):
                global_step += 1
                # clear gradient accumulators
                optimizer.zero_grad()

                inputs = [sample_batched[col].to(self.opt.device) for col in self.opt.inputs_cols]
                outputs = self.model(inputs)
                targets = sample_batched['polarity'].to(self.opt.device)

                loss = criterion(outputs, targets)
                loss.backward()
                optimizer.step()

                n_correct += (torch.argmax(outputs, -1) == targets).sum().item()
                n_total += len(outputs)
                loss_total += loss.item() * len(outputs)
                if global_step % self.opt.log_step == 0:
                    train_acc = n_correct / n_total
                    train_loss = loss_total / n_total
                    logger.info('loss: {:.4f}, acc: {:.4f}'.format(train_loss, train_acc))

            val_acc, val_f1 = self._evaluate_acc_f1(val_data_loader)
            logger.info('> val_acc: {:.4f}, val_f1: {:.4f}'.format(val_acc, val_f1))
            if val_acc > max_val_acc:
                max_val_acc = val_acc
                if not os.path.exists('state_dict'):
                    os.mkdir('state_dict')
                path = 'state_dict/{0}_{1}_val_acc{2}'.format(self.opt.model_name, self.opt.dataset, round(val_acc, 4))
                torch.save(self.model.state_dict(), path)
                logger.info('>> saved: {}'.format(path))
            if val_f1 > max_val_f1:
                max_val_f1 = val_f1

        return path

    def _evaluate_acc_f1(self, data_loader):
        n_correct, n_total = 0, 0
        t_targets_all, t_outputs_all = None, None
        # switch model to evaluation mode
        self.model.eval()
        with torch.no_grad():
            for t_batch, t_sample_batched in enumerate(data_loader):
                t_inputs = [t_sample_batched[col].to(self.opt.device) for col in self.opt.inputs_cols]
                t_targets = t_sample_batched['polarity'].to(self.opt.device)
                t_outputs = self.model(t_inputs)

                n_correct += (torch.argmax(t_outputs, -1) == t_targets).sum().item()
                n_total += len(t_outputs)

                if t_targets_all is None:
                    t_targets_all = t_targets
                    t_outputs_all = t_outputs
                else:
                    t_targets_all = torch.cat((t_targets_all, t_targets), dim=0)
                    t_outputs_all = torch.cat((t_outputs_all, t_outputs), dim=0)

        acc = n_correct / n_total
        f1 = metrics.f1_score(t_targets_all.cpu(), torch.argmax(t_outputs_all, -1).cpu(), labels=[0, 1, 2], average='macro')
        return acc, f1

    def run(self):
        # Loss and Optimizer
        criterion = nn.CrossEntropyLoss()
        _params = filter(lambda p: p.requires_grad, self.model.parameters())
        optimizer = self.opt.optimizer(_params, lr=self.opt.learning_rate, weight_decay=self.opt.l2reg)

        train_data_loader = DataLoader(dataset=self.trainset, batch_size=self.opt.batch_size, shuffle=True)
        test_data_loader = DataLoader(dataset=self.testset, batch_size=self.opt.batch_size, shuffle=False)
        val_data_loader = DataLoader(dataset=self.valset, batch_size=self.opt.batch_size, shuffle=False)

        self._reset_params()
        best_model_path = self._train(criterion, optimizer, train_data_loader, val_data_loader)
        self.model.load_state_dict(torch.load(best_model_path))
        self.model.eval()
        test_acc, test_f1 = self._evaluate_acc_f1(test_data_loader)
        logger.info('>> test_acc: {:.4f}, test_f1: {:.4f}'.format(test_acc, test_f1))

In [5]:
model_classes = {
    'mgan': MGAN,
    'lstm': LSTM,
}
dataset_files = {
    'twitter': {
        'train': './datasets/acl-14-short-data/train.raw',
        'test': './datasets/acl-14-short-data/test.raw'
    },
    'restaurant': {
        'train': './datasets/semeval14/Restaurants_Train.xml.seg',
        'test': './datasets/semeval14/Restaurants_Test_Gold.xml.seg'
    },
    'laptop': {
        'train': './datasets/semeval14/Laptops_Train.xml.seg',
        'test': './datasets/semeval14/Laptops_Test_Gold.xml.seg'
    }
}
input_colses = {
    'lstm': ['text_raw_indices'],
    'mgan': ['text_raw_indices', 'aspect_indices', 'text_left_indices'],
}
initializers = {
    'xavier_uniform_': torch.nn.init.xavier_uniform_,
}
optimizers = {
    'adagrad': torch.optim.Adagrad,  # default lr=0.01
    'adam': torch.optim.Adam,  # default lr=0.001
    'asgd': torch.optim.ASGD,  # default lr=0.01
    'sgd': torch.optim.SGD,
}


In [6]:
class Parameter:
    def __init__(self, model_class, dataset_file, inputs_cols, initializer, 
                 optimizer, model_name, dataset, learning_rate, num_epoch):
        self.model_class = model_class
        self.dataset_file = dataset_file
        self.inputs_cols = inputs_cols
        self.initializer = initializer
        self.optimizer = optimizer
        self.model_name = model_name
        self.dataset = dataset
        self.learning_rate = learning_rate
        self.num_epoch = num_epoch
        
        self.dropout = 0.1
        self.l2reg = 0.01
        self.batch_size = 64
        self.log_step = 5
        self.embed_dim = 300
        self.hidden_dim = 300
        self.max_seq_len = 80
        self.polarities_dim = 3
        self.valset_ratio = 0
        self.device = torch.device('cpu')

## LSTM - laptop dataset

In [7]:
model_name = 'lstm'
dataset = 'laptop' # twitter, laptop， restaurant
optimizer = 'adam'
initializer = 'xavier_uniform_'
learning_rate = 1e-3

log_file = '{}-{}-{}.log'.format(model_name, dataset, strftime("%y%m%d-%H%M", localtime()))
logger.addHandler(logging.FileHandler(log_file))

opt_lstm = Parameter(model_classes[model_name], dataset_files[dataset], input_colses[model_name], 
              initializers[initializer], optimizers[optimizer], model_name, dataset, learning_rate, 20)

ins = Instructor(opt_lstm)
ins.run()

loading tokenizer: laptop_tokenizer.dat
loading embedding_matrix: 300_laptop_embedding_matrix.dat
> training parameters:
> model_class: <class '__main__.LSTM'>
> dataset_file: {'train': './datasets/semeval14/Laptops_Train.xml.seg', 'test': './datasets/semeval14/Laptops_Test_Gold.xml.seg'}
> inputs_cols: ['text_raw_indices']
> initializer: <function xavier_uniform_ at 0x1a15a45b70>
> optimizer: <class 'torch.optim.adam.Adam'>
> model_name: lstm
> dataset: laptop
> learning_rate: 0.001
> num_epoch: 20
> dropout: 0.1
> l2reg: 0.01
> batch_size: 64
> log_step: 5
> embed_dim: 300
> hidden_dim: 300
> max_seq_len: 80
> polarities_dim: 3
> valset_ratio: 0
> hops: 3
> device: cpu
                                                                                                    
epoch: 0
loss: 1.0629, acc: 0.4906
loss: 1.0317, acc: 0.4906
loss: 1.0269, acc: 0.4906
loss: 1.0190, acc: 0.5008
loss: 1.0059, acc: 0.5069
loss: 0.9876, acc: 0.5234
loss: 0.9760, acc: 0.5362
> val_acc: 0.5878, val_f1: 0

  'precision', 'predicted', average, warn_for)


loss: 0.8096, acc: 0.6667
loss: 0.8166, acc: 0.6621
loss: 0.8093, acc: 0.6611
loss: 0.8063, acc: 0.6554
loss: 0.8071, acc: 0.6522
loss: 0.8058, acc: 0.6557
loss: 0.8041, acc: 0.6600
> val_acc: 0.6097, val_f1: 0.4695
>> saved: state_dict/lstm_laptop_val_acc0.6097
                                                                                                    
epoch: 2
loss: 0.7013, acc: 0.7188
loss: 0.7512, acc: 0.7031
loss: 0.7357, acc: 0.7102
loss: 0.7461, acc: 0.6963
loss: 0.7416, acc: 0.6994
loss: 0.7361, acc: 0.7031
loss: 0.7374, acc: 0.7036
loss: 0.7496, acc: 0.6984
> val_acc: 0.6630, val_f1: 0.5667
>> saved: state_dict/lstm_laptop_val_acc0.663
                                                                                                    
epoch: 3
loss: 0.7309, acc: 0.6914
loss: 0.7338, acc: 0.6875
loss: 0.7633, acc: 0.6786
loss: 0.7403, acc: 0.6957
loss: 0.7360, acc: 0.6979
loss: 0.7297, acc: 0.7010
loss: 0.7271, acc: 0.7045
> val_acc: 0.6176, val_f1: 0.4430
             

## LSTM - restaurant dataset

In [8]:
model_name = 'lstm'
dataset = 'restaurant' # twitter, laptop， restaurant
optimizer = 'adam'
initializer = 'xavier_uniform_'
learning_rate = 1e-3

log_file = '{}-{}-{}.log'.format(model_name, dataset, strftime("%y%m%d-%H%M", localtime()))
logger.addHandler(logging.FileHandler(log_file))

opt_lstm = Parameter(model_classes[model_name], dataset_files[dataset], input_colses[model_name], 
              initializers[initializer], optimizers[optimizer], model_name, dataset, learning_rate, 20)

ins = Instructor(opt_lstm)
ins.run()

loading tokenizer: restaurant_tokenizer.dat
loading embedding_matrix: 300_restaurant_embedding_matrix.dat
> training parameters:
> model_class: <class '__main__.LSTM'>
> dataset_file: {'train': './datasets/semeval14/Restaurants_Train.xml.seg', 'test': './datasets/semeval14/Restaurants_Test_Gold.xml.seg'}
> inputs_cols: ['text_raw_indices']
> initializer: <function xavier_uniform_ at 0x1a18039ae8>
> optimizer: <class 'torch.optim.adam.Adam'>
> model_name: lstm
> dataset: restaurant
> learning_rate: 0.001
> num_epoch: 20
> dropout: 0.1
> l2reg: 0.01
> batch_size: 64
> log_step: 5
> embed_dim: 300
> hidden_dim: 300
> max_seq_len: 80
> polarities_dim: 3
> valset_ratio: 0
> device: cpu
                                                                                                    
epoch: 0
loss: 0.9327, acc: 0.6469
loss: 0.9501, acc: 0.6234
loss: 0.9588, acc: 0.6219
loss: 0.9486, acc: 0.6219
loss: 0.9348, acc: 0.6219
loss: 0.9356, acc: 0.6146
loss: 0.9253, acc: 0.6188
loss: 0.9194, acc:

loss: 0.5457, acc: 0.7552
loss: 0.5585, acc: 0.7656
loss: 0.5793, acc: 0.7560
loss: 0.5877, acc: 0.7543
loss: 0.5973, acc: 0.7527
loss: 0.6105, acc: 0.7455
loss: 0.6024, acc: 0.7500
loss: 0.5981, acc: 0.7537
loss: 0.6043, acc: 0.7525
loss: 0.6071, acc: 0.7487
loss: 0.6066, acc: 0.7503
> val_acc: 0.7473, val_f1: 0.6219
                                                                                                    
epoch: 17
loss: 0.6716, acc: 0.6250
loss: 0.6070, acc: 0.7500
loss: 0.6067, acc: 0.7543
loss: 0.5896, acc: 0.7598
loss: 0.5867, acc: 0.7641
loss: 0.5836, acc: 0.7614
loss: 0.5855, acc: 0.7626
loss: 0.5856, acc: 0.7635
loss: 0.5871, acc: 0.7607
loss: 0.5934, acc: 0.7582
loss: 0.5910, acc: 0.7586
loss: 0.5890, acc: 0.7592
> val_acc: 0.7286, val_f1: 0.5719
                                                                                                    
epoch: 18
loss: 0.6038, acc: 0.7578
loss: 0.5579, acc: 0.7882
loss: 0.5532, acc: 0.7857
loss: 0.5553, acc: 0.7837
loss: 0.

## LSTM - twitter dataset

In [9]:
model_name = 'lstm'
dataset = 'twitter' # twitter, laptop， restaurant
optimizer = 'adam'
initializer = 'xavier_uniform_'
learning_rate = 1e-3

log_file = '{}-{}-{}.log'.format(model_name, dataset, strftime("%y%m%d-%H%M", localtime()))
logger.addHandler(logging.FileHandler(log_file))

opt_lstm = Parameter(model_classes[model_name], dataset_files[dataset], input_colses[model_name], 
              initializers[initializer], optimizers[optimizer], model_name, dataset, learning_rate, 20)

ins = Instructor(opt_lstm)
ins.run()

loading tokenizer: twitter_tokenizer.dat
loading embedding_matrix: 300_twitter_embedding_matrix.dat
> training parameters:
> model_class: <class '__main__.LSTM'>
> dataset_file: {'train': './datasets/acl-14-short-data/train.raw', 'test': './datasets/acl-14-short-data/test.raw'}
> inputs_cols: ['text_raw_indices']
> initializer: <function xavier_uniform_ at 0x1a18039ae8>
> optimizer: <class 'torch.optim.adam.Adam'>
> model_name: lstm
> dataset: twitter
> learning_rate: 0.001
> num_epoch: 20
> dropout: 0.1
> l2reg: 0.01
> batch_size: 64
> log_step: 5
> embed_dim: 300
> hidden_dim: 300
> max_seq_len: 80
> polarities_dim: 3
> valset_ratio: 0
> device: cpu
                                                                                                    
epoch: 0
loss: 1.0440, acc: 0.4719
loss: 1.0530, acc: 0.4719
loss: 1.0364, acc: 0.4948
loss: 1.0413, acc: 0.4914
loss: 1.0330, acc: 0.4950
loss: 1.0308, acc: 0.4927
loss: 1.0265, acc: 0.4942
loss: 1.0173, acc: 0.5012
loss: 1.0141, acc: 0.5

                                                                                                    
epoch: 11
loss: 0.6216, acc: 0.7578
loss: 0.6860, acc: 0.7366
loss: 0.7144, acc: 0.7083
loss: 0.7118, acc: 0.7050
loss: 0.7247, acc: 0.7010
loss: 0.7217, acc: 0.7008
loss: 0.7179, acc: 0.7017
loss: 0.7163, acc: 0.7023
loss: 0.7145, acc: 0.7024
loss: 0.7075, acc: 0.7058
loss: 0.7075, acc: 0.7043
loss: 0.7052, acc: 0.7053
loss: 0.7091, acc: 0.7021
loss: 0.7120, acc: 0.7001
loss: 0.7162, acc: 0.6986
loss: 0.7163, acc: 0.6991
loss: 0.7152, acc: 0.6997
loss: 0.7209, acc: 0.6952
loss: 0.7214, acc: 0.6948
loss: 0.7228, acc: 0.6933
> val_acc: 0.6575, val_f1: 0.6013
                                                                                                    
epoch: 12
loss: 0.7464, acc: 0.6836
loss: 0.7227, acc: 0.6823
loss: 0.7247, acc: 0.6830
loss: 0.7269, acc: 0.6891
loss: 0.7140, acc: 0.6960
loss: 0.7127, acc: 0.6977
loss: 0.7100, acc: 0.6958
loss: 0.7079, acc: 0.7003
loss: 0.7088, ac

## MGAN - laptop dataset

In [10]:
model_name = 'mgan'
dataset = 'laptop' # twitter, laptop， restaurant
optimizer = 'adam'
initializer = 'xavier_uniform_'
learning_rate = 1e-3

log_file = '{}-{}-{}.log'.format(model_name, dataset, strftime("%y%m%d-%H%M", localtime()))
logger.addHandler(logging.FileHandler(log_file))

opt_mgan = Parameter(model_classes[model_name], dataset_files[dataset], input_colses[model_name], 
              initializers[initializer], optimizers[optimizer], model_name, dataset, learning_rate, 20)

ins = Instructor(opt_mgan)
ins.run()

loading tokenizer: laptop_tokenizer.dat
loading embedding_matrix: 300_laptop_embedding_matrix.dat
> training parameters:
> model_class: <class '__main__.MGAN'>
> dataset_file: {'train': './datasets/semeval14/Laptops_Train.xml.seg', 'test': './datasets/semeval14/Laptops_Test_Gold.xml.seg'}
> inputs_cols: ['text_raw_indices', 'aspect_indices', 'text_left_indices']
> initializer: <function xavier_uniform_ at 0x1a18039ae8>
> optimizer: <class 'torch.optim.adam.Adam'>
> model_name: mgan
> dataset: laptop
> learning_rate: 0.001
> num_epoch: 20
> dropout: 0.1
> l2reg: 0.01
> batch_size: 64
> log_step: 5
> embed_dim: 300
> hidden_dim: 300
> max_seq_len: 80
> polarities_dim: 3
> valset_ratio: 0
> device: cpu
                                                                                                    
epoch: 0
loss: 1.0530, acc: 0.4125
loss: 1.0532, acc: 0.4266
loss: 1.0600, acc: 0.4229
loss: 1.0551, acc: 0.4414
loss: 1.0535, acc: 0.4412
loss: 1.0473, acc: 0.4490
loss: 1.0435, acc: 0.4576

  'precision', 'predicted', average, warn_for)


loss: 1.0009, acc: 0.5729
loss: 0.9812, acc: 0.5547
loss: 0.9715, acc: 0.5625
loss: 0.9562, acc: 0.5773
loss: 0.9310, acc: 0.5944
loss: 0.9242, acc: 0.5926
loss: 0.9167, acc: 0.5919
> val_acc: 0.6066, val_f1: 0.4426
>> saved: state_dict/mgan_laptop_val_acc0.6066
                                                                                                    
epoch: 2
loss: 0.7719, acc: 0.6719
loss: 0.7678, acc: 0.6719
loss: 0.7814, acc: 0.6619
loss: 0.7491, acc: 0.6826
loss: 0.7457, acc: 0.6838
loss: 0.7356, acc: 0.6887
loss: 0.7405, acc: 0.6875
loss: 0.7490, acc: 0.6801
> val_acc: 0.6536, val_f1: 0.5308
>> saved: state_dict/mgan_laptop_val_acc0.6536
                                                                                                    
epoch: 3
loss: 0.7129, acc: 0.6641
loss: 0.6879, acc: 0.7031
loss: 0.7064, acc: 0.6964
loss: 0.6876, acc: 0.7072
loss: 0.6949, acc: 0.7077
loss: 0.6899, acc: 0.7117
loss: 0.6977, acc: 0.7063
> val_acc: 0.6630, val_f1: 0.5363
>> saved: st

## MGAN - restaurant dataset

In [11]:
model_name = 'mgan'
dataset = 'restaurant' # twitter, laptop， restaurant
optimizer = 'adam'
initializer = 'xavier_uniform_'
learning_rate = 1e-3

log_file = '{}-{}-{}.log'.format(model_name, dataset, strftime("%y%m%d-%H%M", localtime()))
logger.addHandler(logging.FileHandler(log_file))

opt_mgan = Parameter(model_classes[model_name], dataset_files[dataset], input_colses[model_name], 
              initializers[initializer], optimizers[optimizer], model_name, dataset, learning_rate, 20)

ins = Instructor(opt_mgan)
ins.run()

loading tokenizer: restaurant_tokenizer.dat
loading embedding_matrix: 300_restaurant_embedding_matrix.dat
> training parameters:
> model_class: <class '__main__.MGAN'>
> dataset_file: {'train': './datasets/semeval14/Restaurants_Train.xml.seg', 'test': './datasets/semeval14/Restaurants_Test_Gold.xml.seg'}
> inputs_cols: ['text_raw_indices', 'aspect_indices', 'text_left_indices']
> initializer: <function xavier_uniform_ at 0x1a18039ae8>
> optimizer: <class 'torch.optim.adam.Adam'>
> model_name: mgan
> dataset: restaurant
> learning_rate: 0.001
> num_epoch: 20
> dropout: 0.1
> l2reg: 0.01
> batch_size: 64
> log_step: 5
> embed_dim: 300
> hidden_dim: 300
> max_seq_len: 80
> polarities_dim: 3
> valset_ratio: 0
> device: cpu
                                                                                                    
epoch: 0
loss: 1.0925, acc: 0.3688
loss: 1.0113, acc: 0.5000
loss: 0.9940, acc: 0.5417
loss: 0.9912, acc: 0.5508
loss: 0.9720, acc: 0.5675
loss: 0.9661, acc: 0.5693
loss:

loss: 0.5421, acc: 0.7891
loss: 0.5601, acc: 0.7776
loss: 0.5712, acc: 0.7778
loss: 0.5583, acc: 0.7779
loss: 0.5629, acc: 0.7734
loss: 0.5628, acc: 0.7670
loss: 0.5656, acc: 0.7681
loss: 0.5636, acc: 0.7703
loss: 0.5610, acc: 0.7708
loss: 0.5586, acc: 0.7712
> val_acc: 0.7518, val_f1: 0.5932
                                                                                                    
epoch: 17
loss: 0.6011, acc: 0.7500
loss: 0.5158, acc: 0.8047
loss: 0.5100, acc: 0.8040
loss: 0.5116, acc: 0.8066
loss: 0.5211, acc: 0.7969
loss: 0.5372, acc: 0.7903
loss: 0.5402, acc: 0.7878
loss: 0.5390, acc: 0.7891
loss: 0.5395, acc: 0.7862
loss: 0.5405, acc: 0.7829
loss: 0.5431, acc: 0.7819
loss: 0.5500, acc: 0.7799
> val_acc: 0.7696, val_f1: 0.6512
                                                                                                    
epoch: 18
loss: 0.5196, acc: 0.7695
loss: 0.5191, acc: 0.7778
loss: 0.5310, acc: 0.7723
loss: 0.5287, acc: 0.7755
loss: 0.5358, acc: 0.7754
loss: 0.

## MGAN - twitter dataset

In [12]:
model_name = 'mgan'
dataset = 'twitter' # twitter, laptop， restaurant
optimizer = 'adam'
initializer = 'xavier_uniform_'
learning_rate = 1e-3

log_file = '{}-{}-{}.log'.format(model_name, dataset, strftime("%y%m%d-%H%M", localtime()))
logger.addHandler(logging.FileHandler(log_file))

opt_mgan = Parameter(model_classes[model_name], dataset_files[dataset], input_colses[model_name], 
              initializers[initializer], optimizers[optimizer], model_name, dataset, learning_rate, 20)

ins = Instructor(opt_mgan)
ins.run()

loading tokenizer: twitter_tokenizer.dat
loading embedding_matrix: 300_twitter_embedding_matrix.dat
> training parameters:
> model_class: <class '__main__.MGAN'>
> dataset_file: {'train': './datasets/acl-14-short-data/train.raw', 'test': './datasets/acl-14-short-data/test.raw'}
> inputs_cols: ['text_raw_indices', 'aspect_indices', 'text_left_indices']
> initializer: <function xavier_uniform_ at 0x1a18039ae8>
> optimizer: <class 'torch.optim.adam.Adam'>
> model_name: mgan
> dataset: twitter
> learning_rate: 0.001
> num_epoch: 20
> dropout: 0.1
> l2reg: 0.01
> batch_size: 64
> log_step: 5
> embed_dim: 300
> hidden_dim: 300
> max_seq_len: 80
> polarities_dim: 3
> valset_ratio: 0
> device: cpu
                                                                                                    
epoch: 0
loss: 1.0381, acc: 0.4813
loss: 1.0391, acc: 0.4594
loss: 1.0097, acc: 0.4833
loss: 1.0068, acc: 0.4891
loss: 0.9899, acc: 0.5125
loss: 0.9928, acc: 0.5109
loss: 0.9934, acc: 0.5147
loss: 0.9

epoch: 11
loss: 0.6487, acc: 0.7656
loss: 0.6975, acc: 0.7054
loss: 0.6927, acc: 0.7148
loss: 0.6784, acc: 0.7188
loss: 0.6742, acc: 0.7173
loss: 0.6766, acc: 0.7170
loss: 0.6723, acc: 0.7202
loss: 0.6770, acc: 0.7158
loss: 0.6747, acc: 0.7143
loss: 0.6735, acc: 0.7158
loss: 0.6802, acc: 0.7142
loss: 0.6821, acc: 0.7138
loss: 0.6787, acc: 0.7157
loss: 0.6806, acc: 0.7155
loss: 0.6796, acc: 0.7161
loss: 0.6791, acc: 0.7165
loss: 0.6777, acc: 0.7167
loss: 0.6779, acc: 0.7161
loss: 0.6748, acc: 0.7171
loss: 0.6773, acc: 0.7139
> val_acc: 0.6777, val_f1: 0.6493
                                                                                                    
epoch: 12
loss: 0.6487, acc: 0.7227
loss: 0.6251, acc: 0.7483
loss: 0.6394, acc: 0.7366
loss: 0.6560, acc: 0.7278
loss: 0.6558, acc: 0.7220
loss: 0.6571, acc: 0.7198
loss: 0.6518, acc: 0.7275
loss: 0.6546, acc: 0.7276
loss: 0.6541, acc: 0.7280
loss: 0.6556, acc: 0.7267
loss: 0.6566, acc: 0.7251
loss: 0.6585, acc: 0.7222
loss: 0.6555,