In [92]:
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split

In [93]:
dataset_x = torch.load("dataset/dataset_x")
dataset_y = torch.load("dataset/dataset_y")
x_train,x_test,y_train,y_test = train_test_split(dataset_x,dataset_y,test_size=0.2,random_state=1)

In [94]:
class BiRNN(nn.Module):
    def __init__(self, embed_size, num_hiddens, num_layers, classes):
        super(BiRNN, self).__init__()
        # bidirectional设为True即得到双向循环神经网络

        self.encoder = nn.LSTM(input_size=embed_size,
                               hidden_size=num_hiddens,
                               num_layers=num_layers,
                               bidirectional=True)
        
        # 初始时间步和最终时间步的隐藏状态作为全连接层输入
        self.classifier = nn.Sequential(
            nn.Linear(4*num_hiddens, 2*num_hiddens),
            # nn.BatchNorm1d(2*num_hiddens),
            nn.Sigmoid(),
            nn.Linear(2*num_hiddens,classes)
        )


    def forward(self, inputs):
        # inputs的形状是(批量大小, 词数, 词向量维度)，因为LSTM需要将序列长度(seq_len)作为第一维，所以将输入转置后
        # 再提取词特征，输出形状为(词数, 批量大小, 词向量维度)
        inputs = torch.transpose(inputs, 0, 1)
        # outputs形状是(词数, 批量大小, 2 * 隐藏单元个数)
        outputs, _ = self.encoder(inputs)  # output, (h, c)
        # 连结初始时间步和最终时间步的隐藏状态作为全连接层输入。它的形状为 (批量大小, 4 * 隐藏单元个数)。
        outs = torch.cat((outputs[0], outputs[-1]), -1)
        outs = self.classifier(outs)
        return outs

In [95]:
model = BiRNN(embed_size=100, num_hiddens=150, num_layers=5, classes=5)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [96]:
sentences = torch.tensor([dataset_x[0]])
sentences.shape

torch.Size([1, 11, 100])

In [97]:
print(model(sentences.to(device)))

tensor([[ 0.2949,  0.1993,  0.1801,  0.3616, -0.3285]], device='cuda:0',
       grad_fn=<AddmmBackward>)


In [98]:
lr = 0.01

In [99]:
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,model.parameters()), lr=lr)
loss = nn.CrossEntropyLoss().to(device) 

In [100]:
def train(dataset_x,dataset_y,num_epochs,batch_size,optimizer,loss):
    batch_count = 0
    for epoch in range(num_epochs):
        print("epoch",epoch)
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for i in range(len(dataset_x)):
            X = torch.tensor([dataset_x[i]])
            y = torch.tensor([dataset_y[i]])
            X = X.to(device)
            y = y.to(device)
            y_hat = model(X)
            y = y.argmax(dim=1).long()
            loss_ = loss(y_hat, y)

            optimizer.zero_grad()
            loss_.backward()
            optimizer.step()
            
            train_l_sum += loss_.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
            
        print(f"train loss:{train_l_sum/len(dataset_x)}")
        print(f"train acc:{train_acc_sum/len(dataset_x)}")

def test(dataset_x,dataset_y,loss=None):
    train_acc_sum= 0.0
    for i in range(len(dataset_x)):
        X = torch.tensor([dataset_x[i]])
        y = torch.tensor([dataset_y[i]])
        X = X.to(device)
        y = y.to(device)
        y_hat = model(X)
        y = y.argmax(dim=1).long()
        train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
    print(f"test acc:{train_acc_sum/len(dataset_x)}")

In [101]:
print("training... ")
train(x_train,y_train,num_epochs=10,batch_size=1,optimizer=optimizer,loss=loss)

training... 
epoch 0
train loss:1.205892262746427
train acc:0.6134539692276236
epoch 1
train loss:1.2647147973276145
train acc:0.5963298062669324
epoch 2
train loss:1.283054566594153
train acc:0.5910647651178245
epoch 3


KeyboardInterrupt: 

In [None]:
torch.save(model,"models/BiLSTM_v2.npy")

In [None]:
print("testing... ")
test(x_test,y_test)

testing... 
test acc:0.7076262523001431
