In [1]:
import string
import random
import torch
import torch.nn as nn
import torch.nn.utils as utils
import matplotlib.pyplot as plt
import timeit

#### Prepare for Dataset

In [2]:
all_chars       = string.printable
n_chars         = len(all_chars)
file            = open('./shakespeare.txt').read()
file_len        = len(file)

print('Length of file: {}'.format(file_len))
print('All possible characters: {}'.format(all_chars))
print('Number of all possible characters: {}'.format(n_chars))

Length of file: 1115394
All possible characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ 	

Number of all possible characters: 100


In [3]:
batch_size = 64

# Get a random sequence of the Shakespeare dataset.
def get_random_seq():
    seq_len     = 128  # The length of an input sequence.
    start_index = random.randint(0, file_len - seq_len)
    end_index   = start_index + seq_len + 1
    return file[start_index:end_index]

# Convert the sequence to one-hot tensor.
def seq_to_onehot(seq):
    tensor = torch.zeros(len(seq[0]), batch_size, n_chars) 
    for i in range(batch_size):
        for t, char in enumerate(seq[i]):
            index = all_chars.index(char)
            tensor[t][i][index] = 1
    return tensor

# Convert the sequence to index tensor.
def seq_to_index(seq):
    tensor = torch.zeros(len(seq[0]), batch_size)
    for i in range(batch_size):
        for t, char in enumerate(seq[i]):
            tensor[t][i] = all_chars.index(char)
    return tensor

# Sample a mini-batch including input tensor and target tensor.
def get_input_and_target():
    seqs   = [get_random_seq() for _ in range(batch_size)]
    input = seq_to_onehot([seq[:-1] for seq in seqs])
    target = seq_to_index([seq[1:] for seq in seqs]).long()
    return input, target

#### Choose a Device

In [4]:
# If there are GPUs, choose the first one for computing. Otherwise use CPU.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)  
# If 'cuda:0' is printed, it means GPU is available.

cuda:0


#### Network Definition

In [5]:
class Net(nn.Module):
    def __init__(self):
        # Initialization.
        super(Net, self).__init__()
        self.input_size  = n_chars   # Input size: Number of unique chars.
        self.hidden_size = 700       # Hidden size: 100.
        self.output_size = n_chars   # Output size: Number of unique chars.
        self.num_layers = 1
        self.batch_size = 64
        
        #self.rnn_cell = nn.RNNCell(self.input_size, self.hidden_size)
        self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.num_layers)#, batch_first=True)
        self.linear = nn.Linear(self.hidden_size, self.output_size)
    
    
    def forward(self, input, hidden):
        # input shape is 128, 64, 100
        out, (hn, cn) = self.lstm(input, hidden)
        out = out.transpose(0, 1)
        output = self.linear(out)
        output = output.transpose(0, 1)
        # output = (seq length, batch size, output size)
        return output, (hn, cn)
        # in: L, N, H(in)
        # in: (h_0, c_0) = (num_layers, N, H(out)), (num_layers, N, H(cell))
        # out: length, batch size, hidden size * (1)
    
    def init_hidden(self):
        # Initial hidden state.
        # 1 means batch size = 1.
        h0 = torch.zeros(self.num_layers, self.batch_size, self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, self.batch_size, self.hidden_size).to(device)
        return (h0, c0)

    
netOG = Net()     # Create the network instance.
netOG.to(device)  # Move the network parameters to the specified device.
sum(p.numel() for p in netOG.parameters())

2315700

#### Training Step and Evaluation Step

In [6]:
def train_step(net, opt, input, target):
    # Initialize hidden state and gradients.
    hidden = net.init_hidden()
    opt.zero_grad()
    
    # Forward pass.
    output, hidden = net(input, hidden)
    
    # Compute loss. Flatten output and target tensors and compute cross-entropy.
    #print()
    #print(output.shape, target.shape)
    loss = loss_func(output.reshape(-1, net.output_size), target.reshape(-1))

    # Backward pass and optimization.
    loss.backward()
    opt.step()

    return loss.item()

In [7]:
# Evaluation step function.
def eval_step(net, init_seq='W', predicted_len=100):
    net.eval()
    
    # Initialize the hidden state, input and the predicted sequence.
    hidden        = net.init_hidden()
    init_input    = seq_to_onehot([init_seq]*net.batch_size).to(device)
    predicted_seq = init_seq

    # Use initial string to "build up" hidden state.
    for t in range(len(init_seq) - 1):
        # network wants seq_length, batch_size, n_chars
        output, hidden = net(init_input[t].unsqueeze(0), hidden)  # Reshape input to (1, batch_size, -1).
        
    # Set current input as the last character of the initial string.
    input = init_input[-1,:,:].unsqueeze(0)  # Reshape input to (1, batch_size, -1).
    
    # Predict more characters after the initial string.
    for t in range(predicted_len):
        # Get the current output and hidden state.
        output, hidden = net(input, hidden)
        
        # Sample from the output as a multinomial distribution.
        predicted_index = torch.multinomial(output[0, 0, :].exp(), 1)[0]

        
        # Add predicted character to the sequence and use it as next input.
        predicted_char  = all_chars[predicted_index]
        predicted_seq  += predicted_char

        
        # Use the predicted character to generate the input of next round.
        #input = seq_to_onehot(predicted_char).to(device)
        #input = input.unsqueeze(0).unsqueeze(0)  # Reshape input to (1, batch_size, -1).
        input = seq_to_onehot([predicted_char]*net.batch_size).to(device)
        input = input[0,:,:].unsqueeze(0)  # Reshape input to (1, batch_size, -1).
    
    net.train()
    
    return predicted_seq

#### Training Procedure

In [8]:
# Number of iterations.
# NOTE: You may reduce the number of training iterations if the training takes long.
iters       = 10000  # Number of training iterations.
print_iters = 100    # Number of iterations for each log printing.

# The loss variables.
all_losses = []
loss_sum   = 0

# Initialize the optimizer and the loss function.
opt       = torch.optim.Adam(netOG.parameters(), lr=0.005)
loss_func = nn.CrossEntropyLoss()

start_train = timeit.default_timer()


# Training procedure.
for i in range(iters):
    input, target = get_input_and_target()            # Fetch input and target.
    input, target = input.to(device), target.to(device) # Move to GPU memory.
    #print(target.shape)
    #print()
    # input shape is 128, 64, 100
    # target shape is 128, 64
    # output shape is 128, 64, 100
    loss      = train_step(netOG, opt, input, target)   # Calculate the loss.
    loss_sum += loss                                  # Accumulate the loss.

    #print(i)
    # Print the log.
    if i % print_iters == print_iters - 1:
        print('iter:{}/{} loss:{}'.format(i, iters, loss_sum / print_iters))
        print('generated sequence: {}\n'.format(eval_step(netOG)))
              
        # Track the loss.
        all_losses.append(loss_sum / print_iters)
        loss_sum = 0

end_train = timeit.default_timer()
print ("Training time elapsed:", end_train - start_train, "s")

In [9]:
#torch.save(netOG.state_dict(), 'VanillaRNN_params_ -layers  -700.pth')

### with 3 layers, hidden 500, train time was 1662s, final loss was 2.290
### with 3 layers, hidden 300, train time was 1534s, final loss was 0.741
# with 3 layers, hidden 200, train time was 1440s, final loss was 1.160
# with 3 layers, hidden 100, train time was 1287s, final loss was 1.376

### with 2 layers, hidden 700, train time was 1714s, final loss was 0.740
### with 2 layers, hidden 500, train time was 1612s, final loss was 0.794
# with 2 layers, hidden 300, train time was 1393s, final loss was 0.856
# with 2 layers, hidden 200, train time was 1256, final loss was 1.107
# with 2 layers, hidden 100, train time was 1256s, final loss was 1.324

### with 1 layers, hidden 700, train time was 2321s, final loss was 0.797
# with 1 layers, hidden 500, train time was 1547s, final loss was 0.928


# 1 layer?
# hidden 200? hidden 700?
# new batch size?
# more randomness in training?

In [10]:
# import pickle

# with open('VanillaRNN_lossList_ -layers h- _ .pkl', 'wb') as file:
#     pickle.dump(all_losses, file)

#### Training Loss Curve

In [11]:
plt.xlabel('iters')
plt.ylabel('loss')
plt.plot([loss for loss in all_losses])
#.item()
plt.show()

#### Evaluation: A Sample of Generated Sequence

In [12]:
netOG.load_state_dict(torch.load('VanillaRNN_params_1-layers h-700.pth'))

<All keys matched successfully>

In [13]:
print(eval_step(netOG, init_seq='W', predicted_len=300))

With all my heart for a kinsman follow.

PRINCE EDWARD:
And have the lusty service and no bight,
To use the best end itself ring for me
To speak to the blind of the wind word Hastings
That that no beastly swope the imach of York
Ulusugn their loves; and now, that this took him by
sal than this man th


In [14]:
print(eval_step(netOG, init_seq='W', predicted_len=500))

Warwick was too sudden on the field,
The night is all as of that feasting wrench
Hath won to hear of Naples? woe, the gods?
What holy time the law of troops doth make speech,
That think upon the maid you were a grand:
Whater hast thou, or where id you? Hast thou advised
Thine calm this while to'll prophet o'er his honour:
Good goodness are not fair, and he are but tent.
Stay, I pray, sir,--gend me alone, but thou
shalt not piece me in request,
I doubt not buzz the plainest way: faction!'
Blasting


In [15]:
print(eval_step(netOG, init_seq='W', predicted_len=1000))

We shall be no more inme: who, by the yannes
That Romeo must be leave till now deliver'd
Friar welcomed, or madam with him
And we do bud the wanton all understand.
Come, that envious through the dignity than my knight.

ROMEO:
And bire your conditions!

CAMILLO:
Shy hard;
His other this, is it for a kinst mother
To be three shrew-back than thou hast; this self-will ill.

AUTOLYCUS:
When fare, sir, I come to the feat of the air,--for chresion
I now drink to-morrow, come from them, and reason
In cold corse a Roman, that any botty on
me to do remove:
Opp gone! Now foretel some saint illess your children's son
And three or four and as lift as high blood,
The lininess will not have put your noble
That seest a piece of door and honour now,
In brief with that dreams, and all in Ravenspurgh,
That instance to this thing you gle tears to take
no more than a youngerier, makes thee what a chantelioy movest.

VIRGILIA:
Sir, do you hear?

Second Citizen:
No, no soverejeal: the rest of those jealousi

In [16]:
### 2-layers, h-500
# Why have you raised?

# LEONTES:
# Thou hast broke for revenge and all,
# You promised wept in happy days,
# As our be not permity's a prophecy
# Doth shame to take some though gallows from the air
# And bring it with a fury to report him.

# LARTIUS:
# These dieds and truth to flame as scarlet's reporter.

# Officer:
# You may shall posize.

# LEONTES:
# What news, preposterous craft
# To found this Perditaon: hook, wouldst not spool,
# Under your charity to the house of Knebl.
# I come to hence with other in thy beads!
# For she's my squife rivers from my banishment.

# QUEEN ELIZABETH:
# Tut, tumper; what of that?
# You are retired, I would this put in stone:
# The world vouchs of stud of death must have them.

# TRANIO:
# Sir, look sweet, 'tis the fine a tyrant for Henry's
# apposition. We are you, repair of court? bootless labour
# Will have you both by honour shall be so:
# I have a daughter where youtht of hide herself
# What doth he be an arch-reding-back, the Moltag!

# DORSET:
# This wish, my tears are stop amorous farm
# And with all her play'd that monstrous room,
# Some virtuous syralt ought to the drowsing brood;
# Who is't being such fal as last of informon.

# Second Gentleman:
# How now, sir! Away son will not work out?
# For we arriven, which to do it.

# GRUMIO:
# Therefore lend this deed. Excover is here and mine,
# Put up in scarling admiring, yet with spicitude,
# Expede the proclamation: on our encouragement
# Leaving argus he under his battle stoop
# A word with joyful in his fall on us:
# Come, go with us; like one safety for our priest
# How merity shall be what lay had shorter
# Renonned in the chiver men are possection
# That no man enter in this presence of their ears:
# Doose shore recount him will she never sing is very
# That shots in silver dregs of such a price.

# PAULINA:
# None, mother, I hear, Juliet, be not report;
# Lucentio met! make me with thee women.

# MERCUTIO:
# Be thore: betwixt of love!

# DUCHESS OF YORK:
# This is the urbike of him.

# A Patrician:
# Nay, doth he do wear by him, for he that mightst
# me still would tell who firs