In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torchtext import data
from torchtext.vocab import Vectors
from torchtext.data import Field, Example, Iterator

In [2]:
lr=0.0004
hidden_size=100
linear_size=100
batch_size=100
classes=3
EPOCH=3

In [12]:
class ESIM(nn.Module):
    def __init__(self, TEXT):
        super(ESIM,self).__init__()
        self.embedding = nn.Embedding(*TEXT.vocab.vectors.size())
        '''
        nn.Embedding(*TEXT.vocab.vectors.size())
      ==nn.Embedding(*(36990,100))
      ==nn.Embedding(36990,100)
        '''
        self.embedding.weight.data.copy_(TEXT.vocab.vectors)
        # 编码层双向LSTM
        self.A_bilstm_input = nn.LSTM(TEXT.vocab.vectors.size()[1],hidden_size, batch_first=True,bidirectional=True)
        self.B_bilstm_input = nn.LSTM(TEXT.vocab.vectors.size()[1],hidden_size, batch_first=True,bidirectional=True)
        # 推理组合层双向LSTM
        self.A_bilstm_infer = nn.LSTM(8*hidden_size, hidden_size, batch_first=True, bidirectional=True)
        self.B_bilstm_infer = nn.LSTM(8*hidden_size, hidden_size, batch_first=True, bidirectional=True)
        
        self.linear = nn.Sequential(
            nn.Linear(8*hidden_size, 2*hidden_size),
            nn.ReLU(),
            nn.Linear(2*hidden_size, linear_size),
            nn.ReLU(),
            nn.Linear(linear_size, classes)
        )
        
    def forward(self, a, b):
        emb_a = self.embedding(a)
        emb_b=self.embedding(b)
        
        #输入编码层
        a_bar,(h,c)=self.A_bilstm_input(emb_a)
        b_bar,(h,c)=self.B_bilstm_input(emb_b)

        e = torch.matmul(a_bar, b_bar.permute(0,2,1))
        a_tilde = torch.matmul(torch.softmax(e, dim=2), b_bar)
        b_tilde = torch.matmul(torch.softmax(e, dim=1), a_bar)
        
        # 矩阵拼接
        ma = torch.cat([a_bar, a_tilde, a_bar-a_tilde, a_bar*a_tilde],dim=2)
        mb=torch.cat([b_bar,b_tilde,b_bar-b_tilde,b_bar*b_tilde],dim=2)
        
        #推理组合层
        va,(h,c)=self.A_bilstm_infer(ma)
        vb,(h,c)=self.B_bilstm_infer(mb)
        
        va_avg = torch.mean(va, dim=1)
        va_max = torch.max(va, dim=1)[0]
        vb_avg = torch.mean(vb, dim=1)
        vb_max = torch.max(vb, dim=1)[0]
        v=torch.cat([va_avg,va_max,vb_avg,vb_max],dim=1)
        
        #输出预测层
        out=self.linear(v)
        return out

In [4]:
def text_tokenize(x):
    return x.split()

def label_tokenize(y):
    return y

def get_dataset(csv_data,text_field,label_field):
    # csv_data为padas读取后的DataFrame
    fields=[('sentence1',text_field),('sentence2',text_field),('gold_label',label_field)]
    examples=[]
    for text1,text2,label in zip(csv_data['sentence1'],csv_data['sentence2'],csv_data['gold_label']):
        examples.append(data.Example.fromlist([text1,text2,label],fields))
    return examples,fields

#1.1 文本的预处理方法
TEXT=data.Field(sequential=True,tokenize=text_tokenize,fix_length=40)

#1.2 标签的预处理方法
LABEL=data.Field(sequential=False,tokenize=label_tokenize,use_vocab=False)

train_data=pd.read_csv('./data/SNLI/snli-train.txt',sep='\t')
test_data=pd.read_csv('./data/SNLI/snli-test.txt',sep='\t')

#2.1 获取example的列表
train_examples,train_fields=get_dataset(train_data,TEXT,LABEL)
test_examples,test_fields=get_dataset(test_data,TEXT,LABEL)

#2.2 获取Dataset
train=data.Dataset(train_examples, train_fields)
test=data.Dataset(test_examples, test_fields)

#3 导入预训练好的词向量
vectors = Vectors(name='./data/glove.6B.300d.txt')
TEXT.build_vocab(train,vectors=vectors)



In [5]:
#4 构建迭代器
train_iter=Iterator(train,batch_size=100,sort=False,device=torch.device('cuda:0'),repeat=False)
test_iter=Iterator(test, batch_size=100,device=torch.device('cuda:0'), sort=False,repeat=False)



In [13]:
#5 有gpu用gpu，否则cpu
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

net=ESIM(TEXT).to(device)

#6、定义优化方式和损失函数
optimizer=torch.optim.Adam(net.parameters(),lr=lr)
loss_func=torch.nn.CrossEntropyLoss()

In [None]:
for epoch in range(EPOCH):
    for step,batch in enumerate(train_iter):
        #1、把索引转化为tensor变量，载入设备
        a=batch.sentence1.t()
        b=batch.sentence2.t()
        l=batch.gold_label
        
        #2、计算模型输出
        out=net(a,b)

        #3、预测结果传给loss
        loss=loss_func(out,l)
        
        #4、固定格式
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % 200 == 0:
            total=0
            correct=0
            for batch in test_iter:
                tst_a=batch.sentence1.t()
                tst_b=batch.sentence2.t()
                tst_l=batch.gold_label
                out=net(tst_a,tst_b)
                out=torch.argmax(out,dim=1).long()
                if out.size()==tst_l.size():
                    total+=tst_l.size(0)
                    correct+=(out==tst_l).sum().item()
                
            print('[Epoch ~ Step]:',epoch+1,'~',step+1,'训练loss:',loss.item())
            print('[Epoch]:',epoch+1,'测试集准确率: ',(correct*1.0/total))