In [11]:
import argparse
import numpy as np
import os
os.environ["CUDA_VISIBLE_DEVICES"]='0'
import torch
import util
import random
import pickle
import glob
import torch.nn.functional as F
from torch_geometric.nn import GATConv

import time


In [12]:
binary = False
hidden_size = 300
label_smooth = False
eps = 0
embed_files = [#'/home/binxuanh/resources/embeddings/GoogleNews-vectors-negative300.txt',
             '/home/binxuanh/resources/embeddings/glove.42B.300d.txt']
folder = 'split_dataset/laptop_mask'
bidirection = True
if binary:
    num_classes = 2
else:
    num_classes = 3
dropout = 0.7
att_dropout = 0
lr = 0.001
epochs = 50
shuffle = True
layer = 3
heads = 6
batch_size = 32
embed_trainable = False
max_aspect_length = 10
l2 = 1e-4
add_self = True

In [13]:
train_texts,train_targets,train_labels = util.readACL(os.path.join(folder,'restaurants_train.txt'),binary)

dev_texts,dev_targets,dev_labels = util.readACL(os.path.join(folder,'restaurants_dev.txt'),binary)#Restaurants
train_texts+= dev_texts
train_targets += dev_targets
train_labels += dev_labels



dev_texts,dev_targets,dev_labels = util.readACL(os.path.join(folder,'restaurants_test.txt'),binary)#Restaurants
test_texts, test_targets, test_labels = [],[],[]

def get_dep(filename):
    fin = open(filename,'r')
    nets = []
    net = []
    for line in fin:
        if line == '\n':
            nets.append(net)
            net = []
        else:
            e = line.replace('(','|').replace(')','|').replace(', ','|').split('|')
            #if e[0] == 'root':
            #    continue
            relation = e[0]
            src = '-'.join(e[1].split('-')[0:-1]).strip()
            src_index = int(e[1].split('-')[-1])
            tgt = '-'.join(e[2].split('-')[0:-1]).strip()
            tgt_index = int(e[2].split('-')[-1])
            net.append([src,tgt,relation,src_index,tgt_index])
    return nets

train_deps = get_dep(os.path.join(folder,'rest_train_text_mask.txt.dep'))

dev_deps = get_dep(os.path.join(folder,'rest_dev_text_mask.txt.dep'))
train_deps += dev_deps
dev_deps = get_dep(os.path.join(folder,'rest_test_text_mask.txt.dep'))
test_deps = []
vocab = {'_UNKNOWN_':0}

def get_target_pos(deps,targets):
    target_pos = []
    texts = []
    for i,t in enumerate(targets):
        dep = deps[i]
        word2index = {}
        index2word = {}
        for edge in dep:
            index2word[edge[3]] = edge[0]
            index2word[edge[4]] = edge[1]
        text = [index2word[i] for i in range(1,len(index2word))]
        texts.append(text)
        for w in text:
            if w not in vocab:
                vocab[w] = len(vocab)
        for w in t.split():
            if w not in vocab:
                vocab[w] = len(vocab)
        target_pos.append(text.index('TargetTarget'))
        
    return target_pos,texts
train_pos,train_texts = get_target_pos(train_deps,train_targets)
dev_pos,dev_texts = get_target_pos(dev_deps,dev_targets)
test_pos,test_texts = get_target_pos(test_deps,test_targets)

In [14]:
embs = []
for embed_file in embed_files:
    base_emb = os.path.basename(embed_file)
    if os.path.exists(os.path.join(folder,base_emb)+'.npy'):
        emb = np.load(os.path.join(folder,base_emb)+'.npy')
        #vocab = pickle.load(open(os.path.join(folder,base_emb)+'.vocab', 'rb'))
    else:
        emb = np.random.uniform(-0.01,0.01,[len(vocab), 300])
        emb[0] = np.zeros(300)
        with open(embed_file,'r') as fin:
            for line in fin:
                e = line.strip().split(' ')
                if e[0].lower() in vocab:
                    emb[vocab[e[0].lower()]] = np.array(e[1:],dtype=float)
        np.save(os.path.join(folder,base_emb),emb)
        pickle.dump(vocab, open(os.path.join(folder,base_emb+'.vocab'), 'wb'))
    embs.append(emb)
emb = np.concatenate(embs,1)

FileNotFoundError: [Errno 2] No such file or directory: '/home/binxuanh/resources/embeddings/glove.42B.300d.txt'

In [5]:
word_embed = torch.nn.Embedding.from_pretrained(torch.Tensor(emb))
word_embed.weight.requires_grad=embed_trainable


In [6]:
def transform(text,dep,pos,aspect):
    net = []
    for edge in dep:
        if edge[2]=='root':
            continue
        net.append([edge[3]-1,edge[4]-1])
        if bidirection:
            net.append([edge[4]-1,edge[3]-1])
    word_index = [vocab[w] for w in text]
    aspect_index = [vocab[w] for w in aspect.split()]
    if len(aspect_index)>max_aspect_length:
        aspect_index = aspect_index[0:max_aspect_length]
    else:
        aspect_index = aspect_index + [0]*(max_aspect_length-len(aspect_index))
    aspect_length = len(aspect.split())
    target_node = pos
    return word_index,net,target_node,aspect_index,aspect_length

def get_batch(texts,deps,pos,aspects):
    word_indices = []
    nets = []
    target_nodes = []
    aspect_indices = []
    aspect_lengths = []
    for i in range(len(texts)):
        word_index,net,target_node,aspect_index,aspect_length= transform(texts[i],deps[i],pos[i],aspects[i])
        nets.append(np.array(net)+len(word_indices))
        target_nodes.append(target_node+len(word_indices))
        word_indices += word_index

        aspect_indices += [aspect_index]
        aspect_lengths += [aspect_length]

    return word_indices, np.concatenate(nets,0),target_nodes,aspect_indices,aspect_lengths



In [7]:
import math
def glorot(tensor):
    stdv = math.sqrt(6.0 / (tensor.size(-2) + tensor.size(-1)))
    if tensor is not None:
        tensor.data.uniform_(-stdv, stdv)


class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        #self.lin1 = torch.nn.Linear(hidden_size, hidden_size)
        self.convs = torch.nn.ModuleList()
        for i in range(layer):            
            self.convs.append(GATConv(hidden_size, hidden_size//heads, heads=heads,dropout = att_dropout))
        self.lin3 = torch.nn.Linear(hidden_size, num_classes)
        self.rnn = torch.nn.LSTM(hidden_size,hidden_size,1)
        glorot(self.lin3.weight)
    def forward(self, x, edge_index):
        if dropout>0:
            x = F.dropout(x,p=dropout, training=self.training)
        #x = self.lin1(x)        
        output,(h,c) = self.rnn(torch.unsqueeze(x,0))
        x = torch.squeeze(h,0)

        for i in range(layer):
            #if i == 0:
            #    output,(h,c) = self.rnn(torch.unsqueeze(F.elu(self.convs[i](x, edge_index)),0))
            #else:
            output,(h,c) = self.rnn(torch.unsqueeze(F.elu(self.convs[i](x, edge_index)),0),(h,c))
            x = torch.squeeze(h,0)
        x = self.lin3(x)
        return x
'''
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        #self.lin1 = torch.nn.Linear(hidden_size, hidden_size)
        self.convs = torch.nn.ModuleList()
        for i in range(layer):            
            self.convs.append(GATConv(hidden_size, hidden_size//heads, heads=heads))
        self.lin3 = torch.nn.Linear(hidden_size, num_classes)

    def forward(self, x, edge_index):
        if dropout>0:
            x = F.dropout(x,p=dropout, training=self.training)
        for i in range(layer):
            x = F.elu(self.convs[i](x, edge_index))
        x = self.lin3(x)
        return x
'''

'\nclass Net(torch.nn.Module):\n    def __init__(self):\n        super(Net, self).__init__()\n        \n        #self.lin1 = torch.nn.Linear(hidden_size, hidden_size)\n        self.convs = torch.nn.ModuleList()\n        for i in range(layer):            \n            self.convs.append(GATConv(hidden_size, hidden_size//heads, heads=heads))\n        self.lin3 = torch.nn.Linear(hidden_size, num_classes)\n\n    def forward(self, x, edge_index):\n        if dropout>0:\n            x = F.dropout(x,p=dropout, training=self.training)\n        for i in range(layer):\n            x = F.elu(self.convs[i](x, edge_index))\n        x = self.lin3(x)\n        return x\n'

In [8]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
word_embed.to(device)
model = Net().to(device)

loss_op = torch.nn.CrossEntropyLoss()
#optimizer = torch.optim.Adam(list(model.parameters())+list(word_embed.parameters()), lr=lr)


In [None]:

def train(train_texts,train_targets,train_labels,train_deps,train_pos):
    model.train()
    if shuffle:
        c = list(zip(train_texts,train_targets,train_labels,train_deps,train_pos))
        np.random.shuffle(c)
        train_texts,train_targets,train_labels,train_deps,train_pos = map(list,zip(*c))
    total_loss = 0
    for i in range(0,len(train_texts),batch_size):

        word_index,net,target_node,aspect_index,aspect_length = get_batch(train_texts[i:i+batch_size],train_deps[i:i+batch_size],train_pos[i:i+batch_size],train_targets[i:i+batch_size])

        x = torch.tensor(data=np.array(word_index),dtype=torch.long )

        x_aspect = torch.tensor(data=np.array(aspect_index),dtype=torch.long )
        aspect_length = torch.tensor(data=aspect_length,dtype=torch.float ).view([-1,1])

        aspect_vectors = word_embed(x_aspect.to(device)).sum(1)/aspect_length.to(device)
        
        word_vectors = word_embed(x.to(device))
        word_vectors[target_node] = aspect_vectors
        net_tensor = torch.tensor(np.array(net).transpose())
        target_node_tensor = torch.tensor(target_node,dtype=torch.long)
        y_tensor = torch.tensor(np.array(train_labels[i:i+batch_size])+1,dtype=torch.long).to(device)

        torch.cuda.empty_cache()
        optimizer.zero_grad()

        target_rep = model(word_vectors.to(device), net_tensor.to(device))[target_node_tensor]
        if label_smooth:
            n_class = 3#pred.size(1)

            one_hot = torch.zeros_like(target_rep).scatter(1, y_tensor.view(-1, 1), 1)
            one_hot = one_hot * (1 - eps) + (1 - one_hot) * eps / (n_class - 1)
            log_prb = F.log_softmax(target_rep, dim=1)
            loss = -(one_hot * log_prb).sum(dim=1).mean()
            batch_loss = loss.item()
        else:  
            
            loss = loss_op(target_rep, y_tensor.to(device))
            batch_loss = loss.item()
            
        loss.backward()
        optimizer.step()
        total_loss +=  batch_loss*len(target_node)

    return total_loss/len(train_texts)


def test(texts,targets,labels,deps,pos):
    model.eval()
    with torch.no_grad():
        total_acc = 0
        for i in range(0,len(texts),batch_size):
            word_index,net,target_node,aspect_index,aspect_length = get_batch(texts[i:i+batch_size],deps[i:i+batch_size],pos[i:i+batch_size],targets[i:i+batch_size])
            x = torch.tensor(data=np.array(word_index),dtype=torch.long )
            x_aspect = torch.tensor(data=np.array(aspect_index),dtype=torch.long )
            aspect_length = torch.tensor(data=aspect_length,dtype=torch.float ).view([-1,1])

            aspect_vectors = word_embed(x_aspect.to(device)).sum(1)/aspect_length.to(device)
            
            word_vectors = word_embed(x.to(device))
            word_vectors[target_node] = aspect_vectors
            net_tensor = torch.tensor(np.array(net).transpose())
            target_node_tensor = torch.tensor(target_node,dtype=torch.long)
            y_tensor = torch.tensor(np.array(labels[i:i+batch_size])+1,dtype=torch.long)
            with torch.no_grad():
                target_rep = model(word_vectors.to(device), net_tensor.to(device))[target_node_tensor]
            pred =  target_rep.max(1)[1].cpu().numpy()
            acc = (pred == np.array(labels[i:i+batch_size])+1)

            total_acc += acc.sum()
    return total_acc / len(texts)

val_acc = []
test_acc = [0]


In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=lr,weight_decay=l2)

for epoch in range(epochs):
    loss = train(train_texts,train_targets,train_labels,train_deps,train_pos)
    val_acc.append(test(dev_texts,dev_targets,dev_labels,dev_deps,dev_pos))

    #if len(val_acc)>tolorence and val_acc[-1]<val_acc[-tolorence]+0.0001:
    #    break
    print('Epoch: {:02d}, Loss: {:.4f}, Acc: {:.4f}, Test_Acc: {:.4f}'.format(epoch, loss, val_acc[-1], test_acc[-1]))
    if loss<0.5:
        break
'''
optimizer = torch.optim.Adam(model.parameters(), lr=lr/10)
print('change learning rate')
for epoch in range(epochs):
    loss = train()
    val_acc.append(test(val_loader))
    if val_acc[-1] >= max(val_acc):
        test_acc.append(test(test_loader))
    print('Epoch: {:02d}, Loss: {:.4f}, Acc: {:.4f}, Test_Acc: {:.4f}'.format(epoch, loss, val_acc[-1], test_acc[-1]))
'''
print(max(val_acc),test_acc[-1])


Epoch: 00, Loss: 0.9974, Acc: 0.5486, Test_Acc: 0.0000
Epoch: 01, Loss: 0.8672, Acc: 0.6285, Test_Acc: 0.0000
Epoch: 02, Loss: 0.8036, Acc: 0.6505, Test_Acc: 0.0000
Epoch: 03, Loss: 0.7973, Acc: 0.6991, Test_Acc: 0.0000
Epoch: 04, Loss: 0.7597, Acc: 0.7116, Test_Acc: 0.0000
Epoch: 05, Loss: 0.7421, Acc: 0.6959, Test_Acc: 0.0000
Epoch: 06, Loss: 0.7135, Acc: 0.7038, Test_Acc: 0.0000
Epoch: 07, Loss: 0.7120, Acc: 0.6834, Test_Acc: 0.0000
Epoch: 08, Loss: 0.6906, Acc: 0.7210, Test_Acc: 0.0000
Epoch: 09, Loss: 0.6859, Acc: 0.7351, Test_Acc: 0.0000
Epoch: 10, Loss: 0.6690, Acc: 0.7038, Test_Acc: 0.0000
Epoch: 11, Loss: 0.6454, Acc: 0.6991, Test_Acc: 0.0000
Epoch: 12, Loss: 0.6347, Acc: 0.7085, Test_Acc: 0.0000
Epoch: 13, Loss: 0.6391, Acc: 0.7226, Test_Acc: 0.0000
Epoch: 14, Loss: 0.6296, Acc: 0.7116, Test_Acc: 0.0000
Epoch: 15, Loss: 0.6299, Acc: 0.7476, Test_Acc: 0.0000
Epoch: 16, Loss: 0.6040, Acc: 0.7069, Test_Acc: 0.0000
Epoch: 17, Loss: 0.5930, Acc: 0.7257, Test_Acc: 0.0000
Epoch: 18,

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=lr,weight_decay=l2)
for epoch in range(epochs):
    loss = train(train_texts,train_targets,train_labels,train_deps,train_pos)
    val_acc.append(test(dev_texts,dev_targets,dev_labels,dev_deps,dev_pos))
    
    #if len(val_acc)>tolorence and val_acc[-1]<val_acc[-tolorence]+0.0001:
    #    break
    print('Epoch: {:02d}, Loss: {:.4f}, Acc: {:.4f}, Test_Acc: {:.4f}'.format(epoch, loss, val_acc[-1], test_acc[-1]))
    

Epoch: 00, Loss: 0.4666, Acc: 0.7288, Test_Acc: 0.0000
Epoch: 01, Loss: 0.4514, Acc: 0.7241, Test_Acc: 0.0000
Epoch: 02, Loss: 0.4650, Acc: 0.7210, Test_Acc: 0.0000
Epoch: 03, Loss: 0.4520, Acc: 0.7226, Test_Acc: 0.0000
Epoch: 04, Loss: 0.4482, Acc: 0.7241, Test_Acc: 0.0000
Epoch: 05, Loss: 0.4439, Acc: 0.7257, Test_Acc: 0.0000
Epoch: 06, Loss: 0.4537, Acc: 0.7273, Test_Acc: 0.0000
Epoch: 07, Loss: 0.4487, Acc: 0.7226, Test_Acc: 0.0000
Epoch: 08, Loss: 0.4384, Acc: 0.7226, Test_Acc: 0.0000
Epoch: 09, Loss: 0.4327, Acc: 0.7241, Test_Acc: 0.0000
Epoch: 10, Loss: 0.4417, Acc: 0.7257, Test_Acc: 0.0000
Epoch: 11, Loss: 0.4334, Acc: 0.7257, Test_Acc: 0.0000
Epoch: 12, Loss: 0.4321, Acc: 0.7273, Test_Acc: 0.0000
Epoch: 13, Loss: 0.4268, Acc: 0.7273, Test_Acc: 0.0000
Epoch: 14, Loss: 0.4337, Acc: 0.7273, Test_Acc: 0.0000
Epoch: 15, Loss: 0.4405, Acc: 0.7257, Test_Acc: 0.0000
Epoch: 16, Loss: 0.4472, Acc: 0.7257, Test_Acc: 0.0000
Epoch: 17, Loss: 0.4223, Acc: 0.7241, Test_Acc: 0.0000
Epoch: 18,