In [26]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x1d987325af0>

In [33]:
def prepare_char_sequence(word, to_idx):
    idxs = [to_idx[char] for char in word]
    return torch.tensor(idxs, dtype=torch.long)

def prepare_sequence(seq, to_idx):
    word_idxs = [to_idx[word] for word in seq]
    char_idxs = [prepare_char_sequence(word, char_to_idx) for word in seq]
    return torch.tensor(word_idxs, dtype=torch.long), char_idxs

training_data = [
    ("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]),
    ("Everybody read that book".split(), ["NN", "V", "DET", "NN"])
]

word_to_idx = {}
char_to_idx = {}
for sen, tags in training_data:
    for word in sen:
        if word not in word_to_idx:
            word_to_idx[word] = len(word_to_idx)
        for char in word:
            if char not in char_to_idx:
                char_to_idx[char] = len(char_to_idx)
            
print(word_to_idx)
print(char_to_idx)

tag_to_idx = {'DET':0,'NN':1,'V':2}

WORD_EMBEDDING_DIM = 5
CHAR_EMBEDDING_DIM = 5
WORD_HIDDEN_DIM = 6
CHAR_HIDDEN_DIM = 3

{'The': 0, 'dog': 1, 'ate': 2, 'the': 3, 'apple': 4, 'Everybody': 5, 'read': 6, 'that': 7, 'book': 8}
{'T': 0, 'h': 1, 'e': 2, 'd': 3, 'o': 4, 'g': 5, 'a': 6, 't': 7, 'p': 8, 'l': 9, 'E': 10, 'v': 11, 'r': 12, 'y': 13, 'b': 14, 'k': 15}


In [83]:
class LSTMTagger(nn.Module):
    """
    char_lstm 用于获取单词在char级别的特征，然后拼接成word输入到第二个word_lstm
    所以char_lstm使用word_lstm的loss_function, 也不需要线性层和softmax层
    """
    def __init__(self, char_embedding_dim, word_embedding_dim, \
                 char_hidden_dim, word_hidden_dim, char_size, \
                 vocab_size, tagset_size):
        super(LSTMTagger, self).__init__()
        self.char_embedding_dim = char_embedding_dim
        self.word_embedding_dim = word_embedding_dim
        self.char_hidden_dim = char_hidden_dim
        self.word_hidden_dim  = word_hidden_dim
        self.char_embeddings = nn.Embedding(char_size, char_embedding_dim)
        self.word_embeddings = nn.Embedding(vocab_size, word_embedding_dim)
        
        self.char_lstm = nn.LSTM(char_embedding_dim, char_hidden_dim)
        self.word_lstm = nn.LSTM(word_embedding_dim+char_hidden_dim, word_hidden_dim)
        
        self.hidden2tag = nn.Linear(word_hidden_dim, tagset_size)
        self.char_hidden = self.init_hidden(self.char_hidden_dim)
        self.word_hidden = self.init_hidden(self.word_hidden_dim)
    
    
    def init_hidden(self, hidden_dim):
        """
        初始化隐层（？）
        """
        return(torch.zeros(1, 1, hidden_dim),
               torch.zeros(1, 1, hidden_dim))
    
    
    def forward(self,sentence):
        char_lstm_result = []
        for word in sentence[1]:
            # 使用第一个LSTM抽取char level的信息，作为char embedding，拼凑成word embedding
            self.char_hidden = self.init_hidden(self.char_hidden_dim)
            char_embeds = self.char_embeddings(word)
            char_lstm_out, self.char_hidden = self.char_lstm(char_embeds.view(len(word), 1, -1),\
                                                             self.char_hidden)
            char_lstm_result.append(char_lstm_out[-1])
        
        word_embeds = self.word_embeddings(sentence[0])
        char_lstm_result = torch.stack(char_lstm_result) # 连接所有char_lstm_out
        lstm_in = torch.cat((word_embeds.view(len(sentence[0]), 1, -1), char_lstm_result),2)
        lstm_out, self.hidden = self.word_lstm(lstm_in, self.word_hidden)
        tag_space = self.hidden2tag(lstm_out.view(len(sentence[0]),-1))
        tag_scores = F.log_softmax(tag_space, dim = 1)
        return tag_scores

In [84]:
model = LSTMTagger(CHAR_EMBEDDING_DIM, WORD_EMBEDDING_DIM, CHAR_HIDDEN_DIM, WORD_HIDDEN_DIM, len(char_to_idx), len(word_to_idx), len(tag_to_idx))
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.1)

In [85]:
with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_idx)
    tag_scores = model(inputs)
    print(tag_scores)

tensor([[-0.8977, -1.5980, -0.9411],
        [-0.9097, -1.5934, -0.9311],
        [-0.8446, -1.5566, -1.0233],
        [-0.8484, -1.6203, -0.9833],
        [-0.9211, -1.5732, -0.9300]])


In [86]:
%%time
for epoch in range(300):
    for sentence, tags in training_data:
        model.zero_grad()
        model.hidden = model.init_hidden(WORD_EMBEDDING_DIM)
        sentence_in = prepare_sequence(sentence, word_to_idx)
        targets = prepare_char_sequence(tags, tag_to_idx)
        tag_scores = model(sentence_in)

        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()

with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_idx)
    tag_scores = model(inputs)
    print(tag_scores)

tensor([[-0.0342, -5.3100, -3.5510],
        [-5.3969, -0.0221, -4.0582],
        [-2.8627, -3.0031, -0.1129],
        [-0.0303, -5.7669, -3.6241],
        [-7.0610, -0.0192, -4.0078]])
Wall time: 4.37 s
