In [1]:
from __future__ import print_function
from __future__ import absolute_import
from __future__ import division

import time
import math
import numpy as np

import torch as t
import torch.nn as nn
from torch.autograd import Variable as V
import torch.utils.data as Data
import torch.optim as optim
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

from pyfile.text_loader import TextDataset

#### 1.parameters settings

In [2]:
hidden_size = 100
n_layers = 3
batch_size = 1
n_epochs = 2
n_characters = 128


#### 2. define model

In [3]:
class RNN(nn.Module):
    
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        super(RNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        
        self.embed = nn.Embedding(num_embeddings=input_size,
                                  embedding_dim=hidden_size)
        
        self.gru = nn.GRU(input_size=hidden_size, 
                          hidden_size=hidden_size, 
                          num_layers=n_layers)
        
        self.fc = nn.Linear(in_features=hidden_size, 
                            out_features=output_size)
    
    # This runs this one step at a time
    # It's extremely slow, and please do not use in practice.
    # We need to use(1) batch and (2) data parallelism
    def forward(self, inputs, hidden):
        embed = self.embed(inputs.view(1, -1))  # S(=1) * I
        embed = embed.view(1, 1, -1)            # S(=1) * B(=1) * embedding_size
        
        outputs, hidden = self.gru(embed, hidden)
        outputs = self.fc(outputs.view(1, -1))  # S(=1) * I
        return outputs, hidden
        
    
    def init_hidden(self):
        hidden = V(t.zeros(self.n_layers, 1, hidden_size))
        
        if t.cuda.is_available():
            return hidden.cuda()
        else:
            return hidden
  

In [4]:
# help function
def str2tensor(strings):
    tensor = [ord(char) for char in strings]
    tensor = t.LongTensor(tensor)
    
    if t.cuda.is_available():
        tensor = tensor.cuda()
    return V(tensor)


def generate(decoder, prime_str='A', predict_len=100, temperature=0.8):
    hidden = decoder.init_hidden()
    prime_input = str2tensor(prime_str)
    predicted = prime_str
    
    # Use priming string to "build up" hidden state
    for p in range(len(prime_str) - 1):
        _, hidden = decoder(prime_input[p], hidden)
        
    inp = prime_input[-1]
    
    for p in range(predict_len):
        output, hidden = decoder(inp, hidden)
        
        # Note: 网络作为多元正太分布进行采样
        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = t.multinomial(output_dist, 1)[0]
        
        # Add predicted character to string and use as next input
        predicted_char = chr(top_i)
        predicted += predicted_char
        inp = str2tensor(predicted_char)
    return predicted

#### 3. train model

In [None]:
# Train for a given src and target
# It feeds single string to demonstrate seq2seq
# It's extremely slow, and we need to use (1) batch and (2) data parallelism
def train_teacher_forching(line):
    inputs = str2tensor(line[:-1])
    target = str2tensor(line[1:])
    
    hidden = decoder.init_hidden()
    loss = 0
    for c in range(len(inputs)):
        output, hidden = decoder(inputs[c], hidden)
        loss += criterion(output, target[c])
        
    decoder.zero_grad()
    loss.backward()
    decoder_optimizer.step()
    
    return loss.data[0] / len(input)

def train(line):
    inputs = str2tensor(line[:-1])
    # print("inputs size: ", inputs.size())
    target = str2tensor(line[1:])
    
    hidden = decoder.init_hidden()
    decoder_in = inputs[0]
    
    loss = 0
    for c in range(len(inputs)):
        # print("decoder_in size: ", decoder_in.size())
        output, hidden = decoder(decoder_in, hidden)
        loss += criterion(output, target[c])
        decoder_in = output.max(1)[1]
    
    decoder.zero_grad()
    loss.backward()
    decoder_optimizer.step()
    
    return loss.data[0] / len(inputs)

In [None]:
# main 
decoder = RNN(input_size=n_characters, 
              hidden_size=hidden_size, 
              output_size=n_characters, 
              n_layers=n_layers)

if t.cuda.is_available():
    decoder.cuda()

# optimizer and loss function
decoder_optimizer = optim.Adam(decoder.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

train_loader = Data.DataLoader(dataset=TextDataset(), 
                               batch_size=batch_size,
                               shuffle=True)
print("Training for %d epochs..." % n_epochs)

for epoch in range(1, n_epochs + 1):
    for i, (lines, _) in enumerate(train_loader):
        # print("i: ", i)
        # print("lines: ", lines)
        
        loss = train(lines[0])                   # Batch size is 1
        
        if i % 100 == 0:
            print('[Epoch: (%d %d%%) Step: (%d %d%%) loss: %.4f]' % 
                   (epoch, epoch / n_epochs * 100, 
                    i, i / len(train_loader.dataset) * 100,
                    loss))
            print(generate(decoder, 'Wh', 100), '\n')


Training for 2 epochs...
[Epoch: (1 50%) Step: (0 0%) loss: 4.8857]
WhI	F{:B4vp 5mmO.8SyBg75W^Dd^^?3>wA5PB3OP"c5)y~:bWm"$=a#ts$#d{6@x0nAn7^P\RI^J 

[Epoch: (1 50%) Step: (100 0%) loss: 3.1914]
Whod,ihehedcwoehcgdsclnteehewselnosnieigeogegest,eifhoma,iinanlshdachcslheeeef:hnne:nne,ht:oeeeadoakna 

[Epoch: (1 50%) Step: (200 0%) loss: 2.9407]
Whteehesdlar,ehoeuefofsoteeavhiedamfiiitfhotnui,se,mmdho,ahaauahsakkteconvtohelusetulheytsonhatowehibo 

[Epoch: (1 50%) Step: (300 0%) loss: 2.9553]
Whoeoewiawioiosdnreiolteeoeaeuoaebltdleaamoo,atheeheholhmaoyooy.ecuiygtiyrloaoeaeewewerridnteltkrsaana 

[Epoch: (1 50%) Step: (400 1%) loss: 3.0961]
Whrrnusmpsesegacerneoektyheetis,eomboeoyae,eefeeeheo4adbc,lteci;,etimeeheotdlofissethleeestgeluunh.eoe 

[Epoch: (1 50%) Step: (500 1%) loss: 2.9875]
Whannhheolmbsdiaass,ewrsmsiatioltetott.resitmethtietihybtaflsotwaee,tnmodseys.iietq,kntdyiyeilmikonaes 

[Epoch: (1 50%) Step: (600 1%) loss: 2.9545]
Whoidcugeehescm,toeeyhtlsyotesia