In [1]:
import os

import torch as th
import torch.nn as nn
import torch.nn.functional as F
import torchtext as thtext

import numpy as np
import scipy.sparse
from sklearn.metrics import f1_score, precision_score, recall_score
from tqdm import tqdm

In [2]:
cuda_device = th.device('cuda:3')
th.cuda.set_device(device=cuda_device)

base_path = '/data/blchen/text/CCKS2019-IPRE/'
save_path = os.path.join(base_path, 'preprocessed/sent')
result_path = os.path.join(base_path, 'result')
test_path = os.path.join(base_path, 'sent_relation_test.txt')

## Load data

In [3]:
X_train = np.load(os.path.join(save_path, 'X_train.npy'))
X_dev = np.load(os.path.join(save_path, 'X_dev.npy'))

y_train = scipy.sparse.load_npz(os.path.join(save_path, 'y_train.npz'))
y_dev = scipy.sparse.load_npz(os.path.join(save_path, 'y_dev.npz'))

In [4]:
X_train = th.LongTensor(X_train)
X_dev = th.LongTensor(X_dev)

y_train = th.FloatTensor(y_train.todense())
y_dev = th.FloatTensor(y_dev.todense())

In [5]:
train_dataset = th.utils.data.TensorDataset(X_train, y_train)
train_data_loader = th.utils.data.DataLoader(train_dataset, batch_size=500, shuffle=True)

dev_dataset = th.utils.data.TensorDataset(X_dev, y_dev)
dev_data_loader = th.utils.data.DataLoader(dev_dataset, batch_size=1000)

for X_train_batch, y_train_batch in train_data_loader:
    print('X_train shape', X_train_batch.shape, 'y_train shape', y_train_batch.shape)
    break
print('train_batch_num', len(train_data_loader))
for X_dev_batch, y_dev_batch in dev_data_loader:
    print('X_dev shape', X_dev_batch.shape, 'y_dev shape', y_dev_batch.shape)
    break
print('dev_batch_num', len(dev_data_loader))
print('train/dev split', len(X_train)/(len(X_train) + len(X_dev)))

X_train shape torch.Size([500, 50]) y_train shape torch.Size([500, 35])
train_batch_num 575
X_dev shape torch.Size([1000, 50]) y_dev shape torch.Size([1000, 35])
dev_batch_num 39
train/dev split 0.8820725178654748


## Network

In [6]:
class EXAM(nn.Module):
    
    def __init__(self, feature_num=50, label_num=35, hidden_size=1024, **kwargs):
        super().__init__(**kwargs)
        
        self.hidden_size = hidden_size
        
        self.embed = nn.Embedding(270734, 300, padding_idx=0)
        self.label_embed = nn.Parameter(th.Tensor(label_num, hidden_size*2))
        
        self.rnn = nn.GRU(300, hidden_size, num_layers=2, batch_first=True, bidirectional=True)
        
        self.dense_1 = nn.Linear(feature_num, feature_num*2)
        self.dense_2 = nn.Linear(feature_num*2, 1)
    

    def forward(self, x):
        embed = self.embed(x)
        encode = self.rnn(embed)[0]
        
        interaction = th.matmul(encode, self.label_embed.transpose(0, 1)).transpose(1, 2)
        
        out = F.relu(self.dense_1(interaction))
        out = self.dense_2(out).squeeze(dim=-1)
        
        return out

In [7]:
net = EXAM()
net = net.to(cuda_device)

loss = nn.BCEWithLogitsLoss()
optim = th.optim.Adam(net.parameters(), lr=3e-3)

In [31]:
net.load_state_dict(th.load('/data/blchen/text/CCKS2019-IPRE/net/sent/8.pt', 
                            map_location=lambda storage, loc: storage.cuda(3)))

IncompatibleKeys(missing_keys=[], unexpected_keys=[])

## Evaluation

In [3]:
def precision_k(pred, label, k=[1, 3, 5]):
    batch_size = pred.shape[0]
    
    precision = []
    for _k in k:
        p = 0
        for i in range(batch_size):
            p += label[i, pred[i, :_k]].mean().item()
        precision.append(p*100/batch_size)
    
    return precision


def evaluate(result):
    p1, p3 = 0, 0
    
    with th.no_grad():
        for batch_idx, (X_batch, y_batch) in enumerate(dev_data_loader):

            _batch_size = X_batch.shape[0]
            X_batch = X_batch.cuda()
            y_batch = y_batch.cuda()

            output = net(X_batch)
            pred = output.topk(k=5)[1]

            _p1, _p3 = precision_k(pred, y_batch, k=[1, 3])
            p1 += _p1
            p3 += _p3

    
    batch_idx += 1
    p1 /= batch_idx
    p3 /= batch_idx

    result[-1].append([p1, p3])
    
    return result

In [10]:
def evaluate(result):
    f, p, r = 0, 0, 0
    
    with th.no_grad():
        for batch_idx, (X_batch, y_batch) in enumerate(dev_data_loader):

            _batch_size = X_batch.shape[0]
            X_batch = X_batch.cuda()
            y_batch = y_batch.numpy()

            output = net(X_batch).topk(k=5)[1].cpu().numpy()
            pred = np.zeros(y_batch.shape)
            pred[output] = 1

            _f = f1_score(y_batch, pred, average='micro')
            _p = precision_score(y_batch, pred, average='micro')
            _r = recall_score(y_batch, pred, average='micro')
            f += _f
            p += _p
            r += _r

    
    batch_idx += 1
    f /= batch_idx
    p /= batch_idx
    r /= batch_idx

    result[-1].append([f, p, r])
    
    return result

## Train

In [11]:
result = []

In [None]:
for e in tqdm(range(1, 31)):
    for batch_idx, (X_batch, y_batch) in enumerate(train_data_loader):
        
        X_batch = X_batch.cuda()
        y_batch = y_batch.cuda()
        
        optim.zero_grad()
        l = loss(net(X_batch), y_batch)
        l.backward()
        optim.step()
        
    result.append(['epoch', e])
    result = evaluate(result)
    
    th.save(net.state_dict(), '/data/blchen/text/CCKS2019-IPRE/net/sent/'+ str(e) + '.pt')

  3%|▎         | 1/30 [04:29<2:10:18, 269.60s/it]

In [14]:
result

[['epoch',
  1,
  [0.01505637177637929, 0.028571428571428567, 0.010302219762651423]],
 ['epoch', 2, [0.0199690638017786, 0.028571428571428567, 0.01545581995941709]],
 ['epoch',
  3,
  [0.020065036994704848, 0.028571428571428567, 0.015604439525302846]],
 ['epoch',
  4,
  [0.018357876060181636, 0.028571428571428567, 0.013614892701223643]],
 ['epoch',
  5,
  [0.015290733614168661, 0.028571428571428567, 0.010527762405460252]],
 ['epoch',
  6,
  [0.014340364086814902, 0.028571428571428567, 0.009655967533665379]],
 ['epoch',
  7,
  [0.012818879557048092, 0.028571428571428567, 0.008338068007132758]],
 ['epoch',
  8,
  [0.014720926324428727, 0.028571428571428567, 0.0099790936481584]],
 ['epoch',
  9,
  [0.013566520080581434, 0.028571428571428567, 0.008968886429318087]],
 ['epoch',
  10,
  [0.01297253910982764, 0.028571428571428567, 0.008491914160978914]],
 ['epoch',
  11,
  [0.0130308352410822, 0.028571428571428567, 0.00851755518661994]],
 ['epoch',
  12,
  [0.013945680839479424, 0.02857142857

## Prediction 

In [32]:
X_test = np.load(os.path.join(save_path, 'X_test.npy'))
X_test = th.LongTensor(X_test)

test_data_loader = th.utils.data.DataLoader(X_test, batch_size=1000)

for X_test_batch in test_data_loader:
    print('X_train shape', X_test_batch.shape)
    break
print('train_batch_num', len(test_data_loader))

X_train shape torch.Size([1000, 50])
train_batch_num 78


In [33]:
result = []

with th.no_grad():
    for batch_idx, X_batch in enumerate(test_data_loader):

        _batch_size = X_batch.shape[0]
        X_batch = X_batch.cuda()

        output = net(X_batch)
        pred = output.topk(k=1)[1]
        for p in pred.tolist():
            result.append(p[0])

In [None]:
for r in result:
    if(r != 0):
        print(r)

In [35]:
with open(test_path, 'r') as f:
    with open(os.path.join(result_path, 'result.txt'), 'w') as fw:
        cnt = 0
        for line in f:
            fw.write(line.strip() + '\t' + str(result[cnt]) + '\n')
            cnt += 1