# compare-Aggregate模型搭建

In [1]:
from data_load import load_char_data, get_embed
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import numpy as np
import pickle

400000
400001
将'the of to .'文本转化为id序列：1 4 5 3 id_list转化为文本为：he is eating bread 将'the of to .'文本转化为嵌入向量为：
[-7.1549e-02  9.3459e-02  2.3738e-02 -9.0339e-02  5.6123e-02  3.2547e-01
 -3.9796e-01 -9.2139e-02  6.1181e-02 -1.8950e-01  1.3061e-01  1.4349e-01
  1.1479e-02  3.8158e-01  5.4030e-01 -1.4088e-01  2.4315e-01  2.3036e-01
 -5.5339e-01  4.8154e-02  4.5662e-01  3.2338e+00  2.0199e-02  4.9019e-02
 -1.4132e-02  7.6017e-02 -1.1527e-01  2.0060e-01 -7.7657e-02  2.4328e-01
  1.6368e-01 -3.4118e-01 -6.6070e-02  1.0152e-01  3.8232e-02 -1.7668e-01
 -8.8153e-01 -3.3895e-01 -3.5481e-02 -5.5095e-01 -1.6899e-02 -4.3982e-01
  3.9004e-02  4.0447e-01 -2.5880e-01  6.4594e-01  2.6641e-01  2.8009e-01
 -2.4625e-02  6.3302e-01 -3.1700e-01  1.0271e-01  3.0886e-01  9.7792e-02
 -3.8227e-01  8.6552e-02  4.7075e-02  2.3511e-01 -3.2127e-01 -2.8538e-01
  1.6670e-01 -4.9707e-03 -6.2714e-01 -2.4904e-01  2.9713e-01  1.4379e-01
 -1.2325e-01 -5.8178e-02 -1.0290e-03 -8.2126e-02  3.6935e-01 -5.8442e-04
  3.4286e-01  2.842

In [2]:
## 参数设置

In [3]:
VOCAB_SIZE=400001
embedding_size=200
hidden_size=100
classes=3

maxq_len=20
maxa_len=20
window=3

EPOCH=3
BATCH_SIZE=2
LR=0.001


In [4]:
embed_file = './data/embed.pkl'
with open(embed_file, 'rb') as f:
    embed = pickle.load(f)

# 模型结构
## 预处理层

<img src="./imgs/预处理.png"  width="400" height="400" align="left" />


In [5]:
class Preprocess(nn.Module):
    def __init__(self, in_features, out_features):
        super(Preprocess, self).__init__()
        # Parameter是可传递参数的
        self.Wi = nn.Parameter(torch.randn(in_features, out_features))
        self.bi = nn.Parameter(torch.randn(out_features))
        
        self.Wu = nn.Parameter(torch.randn(in_features, out_features))
        self.bu = nn.Parameter(torch.randn(out_features))
        
    def forward(self, x):
        gate = torch.matmul(x, self.Wi)
        # 把一个tensor变成和函数括号内一样形状的tensor
        gate = torch.sigmoid(gate + self.bi.expand_as(gate))
        out = torch.matmul(x, self.Wu)
        out = torch.tanh(out + self.bu.expand_as(out))
        
        return gate*out

# 注意力层
<img src="./imgs/注意力.png"  width="400" height="400" align="left" />

In [6]:
class Attention(nn.Module):
    def __init__(self):
        super(Attention, self).__init__()
        self.Wg = nn.Parameter(torch.randn(hidden_size, hidden_size))
        self.bg = nn.Parameter(torch.randn(hidden_size))
        
    def forward(self, Q, A):
        G = torch.matmul(Q, self.Wg)
        G = G + self.bg.expand_as(G)
        
        G = torch.matmul(G, A.permute(0,2,1))
        G = torch.softmax(G, dim=1)
        H = torch.matmul(G.permute(0,2,1),Q)
        return H

# 比较层
<img src="./imgs/比较.png"  width="400" height="400" align="left" />


In [7]:
class Compare(nn.Module):
    def __init__(self):
        super(Compare, self).__init__()
        self.W = nn.Parameter(torch.randn(2*hidden_size, hidden_size))
        self.b = nn.Parameter(torch.randn(hidden_size))
        
    def forward(self, h, a):
        sub = (h-a)*(h-a)
        mult = h*a

        T = torch.matmul( torch.cat([sub,mult],dim=2), self.W)
        T = torch.relu(T+self.b.expand_as(T))
        return T
    

In [8]:
class CompAgg(nn.Module):
    def __init__(self):
        super(CompAgg, self).__init__()
        self.embedding = nn.Embedding(VOCAB_SIZE, embedding_size)
        self.embedding.weight.data.copy_(torch.from_numpy(embed))
        self.preprocess = Preprocess(embedding_size, hidden_size)
        self.attention = Attention()
        self.compare = Compare()
        self.aggregate = nn.Conv1d(in_channels=maxa_len, out_channels=window, kernel_size=3, stride=1, padding=1)
        self.predict = nn.Linear(window*hidden_size, classes)
        
    def forward(self, Q, A):
        # emb_q : batch_size, seq_len_q, embedding_size(200)
        # emb_a : batch_size, seq_len_a, embedding_size(200)
        emb_q = self.embedding(Q)
        emb_a = self.embedding(A)
        
        # q_bar : batch_size, seq_len_q, hidden_size(100)
        # a_bar : batch_size, seq_len_a, hidden_size(100)
        q_bar = self.preprocess(emb_q)
        a_bar = self.preprocess(emb_a)
        
        # H : batch_size, seq_len_a, hidden_size
        H = self.attention(q_bar, a_bar)
        
        # T : batch_size, seq_len_a, hidden_size
        T = self.compare(H, a_bar)
        
        # r : batch_size, window, hidden_size
        r = self.aggregate(T)
        r = r.view(-1, window*hidden_size)
        
        # r : batch, 3
        out = self.predict(r)
        return out

In [9]:
class myDataset(Dataset):
    def __init__(self, filepath, len_a, len_b):
        self.path=filepath
        self.a_index, self.b_index, self.label=load_char_data(filepath, len_a, len_b)
        
    def __len__(self):
        return len(self.a_index)
    
    def __getitem__(self, idx):
        return self.a_index[idx], self.b_index[idx], self.label[idx]

### 函数模型举例

In [10]:
train_path='./data/SNLI/snli-dev.txt'
train_data=myDataset(train_path,maxq_len,maxa_len)
train_data[10]

(array([   56,   462,  2123,     4,  6048,   777,   283,   536,   111,
         2759,   550,  1465,  7814,     6, 14781,     0,     0,     0,
            0,     0]),
 array([ 2927, 16789,    67,     4,  1992,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0]),
 1)

In [13]:
train_path='./data/SNLI/snli-train.txt'
test_path='./data/SNLI/snli-test.txt'



train_data=myDataset(train_path,maxq_len,maxa_len)
test_data=myDataset(test_path,maxq_len,maxa_len)
train_loader=DataLoader(dataset=train_data,batch_size=BATCH_SIZE,shuffle=True)
test_loader=DataLoader(dataset=test_data,batch_size=BATCH_SIZE,shuffle=True)


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#device=torch.device('cpu')
net=CompAgg().to(device)


optimizer=torch.optim.Adam(net.parameters(),lr=LR)
loss_func=torch.nn.CrossEntropyLoss()

In [None]:
for epoch in range(EPOCH):
    for step,(text_a,text_b,label) in enumerate(train_loader):
        #1、把索引转化为tensor变量，载入设备
        a=text_a.to(device).long()
        b=text_b.to(device).long()
        l=torch.LongTensor(label).to(device)
        
        
        #2、计算模型输出
        out=net(a,b)

        #3、预测结果传给loss
        loss=loss_func(out,l)
        
        #4、固定格式
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % 200 == 0:
            total=0
            correct=0
            for (test_a,test_b,test_l) in test_loader:
                tst_a=test_a.to(device).long()
                tst_b=test_b.to(device).long()
                tst_l=torch.LongTensor(test_l).to(device)
                out=net(tst_a,tst_b)
                out=torch.argmax(out,dim=1).long()
                if out.size()==tst_l.size():
                    total+=tst_l.size(0)
                    correct+=(out==tst_l).sum().item()
                
            print('[Epoch ~ Step]:',epoch+1,'~',step+1,'训练loss:',loss.item())
            print('[Epoch]:',epoch+1,'测试集准确率: ',(correct*1.0/total))