In [1]:
import random
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as f
import _pickle as cPi
import random

from data import *
from data_utils import *

In [2]:
####### prepare data #######
poems_all = poem_loader()
#data_t = poems[0:3]
#for s in data_t:
    #print(s)

# dictionary for words
word_to_idx = {}
for poem in poems_all:
    for word in poem:
        if word not in word_to_idx:
            word_to_idx[word] = len(word_to_idx)
word_to_idx['<EOP>'] = len(word_to_idx)
word_to_idx['<START>'] = len(word_to_idx)

#with open('wordDic', 'wb') as file:
    #cPi.dump(word_to_idx, file)

#print(len(word_to_idx))
print(len(poems_all))
poems = random.sample(poems_all, 2560)

# change poem sentences into lists
# poems is a list of list
for i in range(len(poems)):
    poems[i] = toList(poems[i])
    poems[i].append("<EOP>")
#print(poems[0])


254225


In [3]:
# Model
class PoetryModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers):
        super(PoetryModel, self).__init__()
        self.embed = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers) 
        self.linear = nn.Linear(hidden_dim, vocab_size)
        
        self.hidden_dim = hidden_dim
        
    def forward(self, x, h):
        length = x.size()[0]
        embeds = self.embed(x).view((length, 1, -1))
        out, (h, c) = self.lstm(embeds, h)
        # 在最后分类器的地方使用非线性
        out = f.relu(self.linear(out.view(length, -1)))
        
        return out, (h, c)
    
    # 包括 h 的initial和 cell 的initial    
    def hidden_initial(self, length=1):
        return (torch.zeros(length, 1, self.hidden_dim),
                torch.zeros(length, 1, self.hidden_dim))
        
        

In [4]:
# training
# Hyper-parameters
learning_rate = 1e-3
num_epochs = 8
batch_size = 64
vocab_size = len(word_to_idx)
embed_dim = 256
hidden_dim = 1024
num_layers = 1

model = PoetryModel(vocab_size, embed_dim, hidden_dim, num_layers)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

TRAINSIZE = len(poems)
for epoch in range(num_epochs):
    for batchIndex in range(TRAINSIZE // batch_size):
        model.zero_grad()
        loss = 0
        counts = 0
        for i in range(batchIndex * batch_size, min((batchIndex + 1) * batch_size, TRAINSIZE)):
            s = poems[i]
            x, o = in_and_out(s, word_to_idx)
            hidden = model.hidden_initial()
            output, hidden = model(x, hidden)
            loss += criterion(output, o)
            counts +=1
        loss = loss / counts
        loss.backward()
        optimizer.step()
        if (batchIndex + 1) % 10 == 0:
            print ('Epoch[{}/{}], Batch:{}, Loss:{:.4f}'.format(epoch+1, num_epochs,batchIndex+1, loss.item()))

torch.save(model.state_dict(), 'PoetryModel.ckpt')

Epoch[1/8], Batch:10, Loss:7.3877
Epoch[1/8], Batch:20, Loss:7.2987
Epoch[1/8], Batch:30, Loss:7.1663
Epoch[1/8], Batch:40, Loss:7.1971
Epoch[2/8], Batch:10, Loss:7.0004
Epoch[2/8], Batch:20, Loss:7.0375
Epoch[2/8], Batch:30, Loss:6.9349
Epoch[2/8], Batch:40, Loss:6.9830
Epoch[3/8], Batch:10, Loss:6.7754
Epoch[3/8], Batch:20, Loss:6.8300
Epoch[3/8], Batch:30, Loss:6.7134
Epoch[3/8], Batch:40, Loss:6.7984
Epoch[4/8], Batch:10, Loss:6.6064
Epoch[4/8], Batch:20, Loss:6.6797
Epoch[4/8], Batch:30, Loss:6.5872
Epoch[4/8], Batch:40, Loss:6.6701
Epoch[5/8], Batch:10, Loss:6.4892
Epoch[5/8], Batch:20, Loss:6.5677
Epoch[5/8], Batch:30, Loss:6.5005
Epoch[5/8], Batch:40, Loss:6.5874
Epoch[6/8], Batch:10, Loss:6.3949
Epoch[6/8], Batch:20, Loss:6.4755
Epoch[6/8], Batch:30, Loss:6.4027
Epoch[6/8], Batch:40, Loss:6.5021
Epoch[7/8], Batch:10, Loss:6.3036
Epoch[7/8], Batch:20, Loss:6.3821
Epoch[7/8], Batch:30, Loss:6.3232
Epoch[7/8], Batch:40, Loss:6.4101
Epoch[8/8], Batch:10, Loss:6.2082
Epoch[8/8], Ba

In [68]:
# generating

model_test = PoetryModel(vocab_size, embed_dim, hidden_dim, num_layers)
model_test.load_state_dict(torch.load('PoetryModel.ckpt'))

word_to_idx = cPi.load(open('./data_set/wordDic', 'br'))
idx_to_word = {value: key for key, value in word_to_idx.items()}
max_length = 50

def PoetryGenerator(startword = '<START>'):
    with torch.no_grad():
        count = 0
        out_poem = ''
        x = torch.tensor([word_to_idx[startword]])
        hidden = model_test.hidden_initial()
        if (startword != '<START>'):
            out_poem = startword
        for i in range(max_length):
            out, hidden = model_test(x, hidden)
            topv, topi = out.data.topk(1)
            idx = topi[0][0]
            w = idx_to_word[idx.item()]
            if w == '<EOP>' :
                break
            else:
                out_poem += w
            x = torch.tensor([word_to_idx[w]])
    return out_poem
        

print (PoetryGenerator('雨'))
print (PoetryGenerator('一'))
print (PoetryGenerator('昊'))
print (PoetryGenerator('夜'))

雨暗無人間，不見不可。
一株風雨一聲，一點不可憐。
昊東風流水，不見不可。
夜凉人間不可憐，一點不見不可憐。


# 4 more epochs   
Since the second parts of the sentences didn't make any sense,and the loss above continuosly decreased,   
I trained the model on 4 more epochs

In [74]:
learning_rate_c = 1e-3
num_epochs_c = 4
batch_size_c = 64
vocab_size_c = len(word_to_idx)
embed_dim_c = 256
hidden_dim_c = 1024
num_layers_c = 1

model_c = PoetryModel(vocab_size_c, embed_dim_c, hidden_dim_c, num_layers_c)
model_c.load_state_dict(torch.load('PoetryModel.ckpt'))
criterion_c = nn.CrossEntropyLoss()
optimizer_c = optim.Adam(model_c.parameters(), lr=learning_rate_c)

TRAINSIZE = len(poems)
for epoch in range(num_epochs_c):
    for batchIndex in range(TRAINSIZE // batch_size_c):
        model_c.zero_grad()
        loss_c = 0
        counts_c = 0
        for i in range(batchIndex * batch_size_c, min((batchIndex + 1) * batch_size_c, TRAINSIZE)):
            s = poems[i]
            x, o = in_and_out(s, word_to_idx)
            hidden = model_c.hidden_initial()
            output, hidden = model_c(x, hidden)
            loss_c += criterion(output, o)
            counts_c +=1
        loss_c = loss_c / counts_c
        loss_c.backward()
        optimizer_c.step()
        if (batchIndex + 1) % 10 == 0:
            print ('Epoch[{}/{}], Batch:{}, Loss:{:.4f}'.format(epoch+1, num_epochs_c, batchIndex+1, loss_c.item())) 
            
torch.save(model_c.state_dict(), 'PoetryModel_c.ckpt')

Epoch[1/4], Batch:10, Loss:6.1919
Epoch[1/4], Batch:20, Loss:6.2261
Epoch[1/4], Batch:30, Loss:6.1789
Epoch[1/4], Batch:40, Loss:6.3158
Epoch[2/4], Batch:10, Loss:6.0517
Epoch[2/4], Batch:20, Loss:6.1191
Epoch[2/4], Batch:30, Loss:6.0789
Epoch[2/4], Batch:40, Loss:6.2227
Epoch[3/4], Batch:10, Loss:5.9651
Epoch[3/4], Batch:20, Loss:6.0366
Epoch[3/4], Batch:30, Loss:5.9952
Epoch[3/4], Batch:40, Loss:6.1391
Epoch[4/4], Batch:10, Loss:5.8850
Epoch[4/4], Batch:20, Loss:5.9484
Epoch[4/4], Batch:30, Loss:5.9128
Epoch[4/4], Batch:40, Loss:6.0594


In [99]:
model_c_test = PoetryModel(vocab_size, embed_dim, hidden_dim, num_layers)
model_c_test.load_state_dict(torch.load('PoetryModel_c.ckpt'))

def PoetryGenerator2(startword = '<START>'):
    with torch.no_grad():
        count = 0
        out_poem = ''
        x = torch.tensor([word_to_idx[startword]])
        hidden = model_c_test.hidden_initial()
        if (startword != '<START>'):
            out_poem = startword
        for i in range(max_length):
            out, hidden = model_c_test(x, hidden)
            topv, topi = out.data.topk(1)
            idx = topi[0][0]
            w = idx_to_word[idx.item()]
            if w == '<EOP>' :
                break
            else:
                out_poem += w
            x = torch.tensor([word_to_idx[w]])
    return out_poem
        
print (PoetryGenerator2('雨'))
print (PoetryGenerator2('一'))
print (PoetryGenerator2('林'))
print (PoetryGenerator2('翊'))

雨暗無人管，不知此日之。
一徑山水抱城流，一點春風雨露香。
林木蕭條一點春，一時來時有餘生。
翊風吹雨過，天下有新詩。


# The result seems much better!   
As the loss still seems to decrease, you can continue training on more epochs and see if a better result can be obtained.      
It could be a little bit time-consuming if you run on cpu(about 1h20min for one epoch :( )     
The last character of my name is "翊"(which can represent the sound of birds flying)，and the last sentence says that when it("翊风") blows and rains, it brings new poems.     
It seems that the machine even learns to know who wrote the poem generator program! Just kidding :)