In [1]:
import sys
sys.path.append('../')

In [2]:
import os
import pickle
import torch
from pytorch_nlp_models.text_pair.siamese_rnn import SiameseGRU
from utils.datasets import LCQMCDataset

from torch.utils.data import DataLoader

In [3]:
DATA_PATH = '../data/'
LCQMC_PATH = os.path.join(DATA_PATH, 'LCQMC')
WORD_VECTORS_PATH = os.path.join(DATA_PATH, 'word_vectors')
BAIDUBAIKE_PKL = os.path.join(WORD_VECTORS_PATH, 'baidubaike.pkl')

MAX_SEQ_LEN = 40

In [4]:
with open(BAIDUBAIKE_PKL, 'rb') as f:
    wvs = pickle.load(f)

In [5]:
iw = wvs['iw']
wi = wvs['wi']
dim = wvs['dim']
emb = wvs['emb']

# 普通初始化

In [6]:
rnn = SiameseGRU(vocab_size=len(iw), emb_dim=dim)
rnn.eval()

  "num_layers={}".format(dropout, num_layers))


SiameseGRU(
  (emb): Embedding(635976, 300, padding_idx=0)
  (rnn): GRU(300, 300, batch_first=True, dropout=0.5, bidirectional=True)
  (fc): Linear(in_features=1800, out_features=2, bias=True)
)

# 单样本前向

In [7]:
dataset = LCQMCDataset(LCQMC_PATH, MAX_SEQ_LEN, wi, charmode = True)

In [8]:
dataset.to('dev')
sample = dataset[1]
ids1 = sample['ids1'].view(1, -1)
ids2 = sample['ids2'].view(1, -1)
len1 = sample['len1'].view(1)
len2 = sample['len2'].view(1)

In [9]:
dataset[1]

{'ids1': tensor([ 1600,  2112,   722,   131,     9,  2459, 14299,   986,   855,  1768,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0]),
 'ids2': tensor([  799,  2468,    67,  2519,  1678,  1600,  5771,    73,   128,  7964,
             9, 10388, 11774,  2320,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0]),
 'len1': tensor(10),
 'len2': tensor(14),
 'label': tensor(0.)}

In [10]:
len1.shape

torch.Size([1])

In [11]:
logits, out1, out2 = rnn(ids1, ids2, len1, len2)

In [12]:
logits

tensor([[0.1109, 0.0897]], grad_fn=<AddmmBackward>)

In [13]:
torch.softmax(logits, dim = 1)

tensor([[0.5053, 0.4947]], grad_fn=<SoftmaxBackward>)

In [14]:
out1.shape, out2.shape

(torch.Size([1, 600]), torch.Size([1, 600]))

# Batch前向
## Batch

In [15]:
batch_size = 10
data_loader = DataLoader(dataset, batch_size=batch_size)
data_iter = iter(data_loader)

In [16]:
batch = next(data_iter)

In [17]:
batch['ids1'].shape, batch['len1'].shape

(torch.Size([10, 40]), torch.Size([10]))

In [18]:
batch['label']

tensor([1., 0., 1., 0., 1., 0., 0., 1., 1., 1.])

In [19]:
logits, out1, out2 = rnn(batch['ids1'], batch['ids2'], batch['len1'], batch['len2'])

In [20]:
logits

tensor([[ 0.1567,  0.2015],
        [ 0.1109,  0.0897],
        [-0.0912,  0.1984],
        [ 0.0875,  0.0019],
        [ 0.0724, -0.0284],
        [-0.0634,  0.4135],
        [-0.1382,  0.1250],
        [ 0.1263,  0.0430],
        [-0.0361,  0.0125],
        [-0.0380, -0.0245]], grad_fn=<AddmmBackward>)

In [21]:
probs = torch.softmax(logits, dim = 1)
probs

tensor([[0.4888, 0.5112],
        [0.5053, 0.4947],
        [0.4281, 0.5719],
        [0.5214, 0.4786],
        [0.5252, 0.4748],
        [0.3830, 0.6170],
        [0.4346, 0.5654],
        [0.5208, 0.4792],
        [0.4879, 0.5121],
        [0.4966, 0.5034]], grad_fn=<SoftmaxBackward>)

In [22]:
torch.max(probs, dim = 1)

torch.return_types.max(
values=tensor([0.5112, 0.5053, 0.5719, 0.5214, 0.5252, 0.6170, 0.5654, 0.5208, 0.5121,
        0.5034], grad_fn=<MaxBackward0>),
indices=tensor([1, 0, 1, 0, 0, 1, 1, 0, 1, 1]))

## One by one

In [23]:
batch_ids1 = batch['ids1']
batch_ids2 = batch['ids2']
batch_len1 = batch['len1']
batch_len2 = batch['len2']
_logits_list = []
for i in range(batch_size):
    _ids1 = batch_ids1[i].view(1, -1)
    _ids2 = batch_ids2[i].view(1, -1)
    _len1 = batch_len1[i].view(1)
    _len2 = batch_len2[i].view(1)
    _lg, _, _ = rnn(_ids1, _ids2, _len1, _len2)
    _logits_list.append(_lg)

In [24]:
_logits = torch.cat(_logits_list, dim = 0)
_logits

tensor([[ 0.1567,  0.2015],
        [ 0.1109,  0.0897],
        [-0.0912,  0.1984],
        [ 0.0875,  0.0019],
        [ 0.0724, -0.0284],
        [-0.0634,  0.4135],
        [-0.1382,  0.1250],
        [ 0.1263,  0.0430],
        [-0.0361,  0.0125],
        [-0.0380, -0.0245]], grad_fn=<CatBackward>)

In [25]:
_probs = torch.softmax(_logits, dim = 1)
_probs

tensor([[0.4888, 0.5112],
        [0.5053, 0.4947],
        [0.4281, 0.5719],
        [0.5214, 0.4786],
        [0.5252, 0.4748],
        [0.3830, 0.6170],
        [0.4346, 0.5654],
        [0.5208, 0.4792],
        [0.4879, 0.5121],
        [0.4966, 0.5034]], grad_fn=<SoftmaxBackward>)

## batch前向 vs one by one

In [26]:
_probs - probs

tensor([[ 0.0000e+00,  0.0000e+00],
        [-5.9605e-08,  2.9802e-08],
        [-2.9802e-08,  0.0000e+00],
        [ 0.0000e+00, -5.9605e-08],
        [-5.9605e-08, -2.9802e-08],
        [ 0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  2.9802e-08],
        [ 0.0000e+00,  0.0000e+00],
        [ 2.9802e-08,  0.0000e+00]], grad_fn=<SubBackward0>)

In [27]:
_logits - logits

tensor([[-2.9802e-08, -5.9605e-08],
        [-1.0431e-07,  2.2352e-08],
        [-5.2154e-08, -1.4901e-08],
        [ 3.7253e-08, -1.1176e-07],
        [-2.2352e-08,  3.7253e-09],
        [ 1.0431e-07,  5.9605e-08],
        [ 2.9802e-08, -1.4901e-08],
        [ 0.0000e+00,  2.2352e-08],
        [-1.4901e-08,  1.1176e-08],
        [-7.4506e-09, -4.0978e-08]], grad_fn=<SubBackward0>)

# 加载训练词向量

In [28]:
rnn = SiameseGRU(vocab_size=len(iw), emb_dim=dim, 
                 emb_weights=  torch.tensor(emb, dtype = torch.float32),
                 emb_static=True
                )

In [29]:
rnn.emb.state_dict()['weight']

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.1045, -0.4096,  0.0025,  ...,  0.2424,  0.5210,  0.0380],
        ...,
        [ 0.1317, -0.0819,  0.0877,  ..., -0.0862, -0.0418, -0.1139],
        [ 0.0918,  0.1966, -0.0043,  ..., -0.1252,  0.0385,  0.0049],
        [ 0.0351,  0.1157, -0.0244,  ..., -0.0970,  0.0307, -0.0839]])

In [30]:
torch.tensor(emb)

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.1045, -0.4096,  0.0025,  ...,  0.2424,  0.5210,  0.0380],
        ...,
        [ 0.1317, -0.0819,  0.0877,  ..., -0.0862, -0.0418, -0.1139],
        [ 0.0918,  0.1966, -0.0043,  ..., -0.1252,  0.0385,  0.0049],
        [ 0.0351,  0.1157, -0.0244,  ..., -0.0970,  0.0307, -0.0839]])